From: Baikal Electronics <support@baikalelectronics.ru>
Date: Wed, 5 Jul 2023 11:01:33 +0000 (+0300)
Subject: SDK 5.10
X-Git-Tag: baikal/aarch64/sdk5.10
X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=783d1b6bbf422e587eb3362009516fa4c5818299;p=kernel.git

SDK 5.10
---

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 967e78c5ec0a1..bf0b41d70bfa6 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -137,6 +137,8 @@ patternProperties:
     description: Azoteq (Pty) Ltd
   "^azw,.*":
     description: Shenzhen AZW Technology Co., Ltd.
+  "^baikal,.*":
+    description: BAIKAL ELECTRONICS, JSC
   "^bananapi,.*":
     description: BIPAI KEJI LIMITED
   "^bhf,.*":
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 90202e5608d1e..3037e6b3b7b9b 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -29,6 +29,17 @@ config ARCH_ALPINE
 	  This enables support for the Annapurna Labs Alpine
 	  Soc family.
 
+config ARCH_BAIKAL
+	bool "Baikal Electronics SoC family"
+	select DW_APB_TIMER_OF
+	select GPIOLIB
+	select GPIO_DWAPB
+	select MULTIPLEXER
+	select OF_GPIO
+	select PINCTRL
+	help
+	  This enables support for Baikal Electronics SoC family
+
 config ARCH_BCM2835
 	bool "Broadcom BCM2835 family"
 	select TIMER_OF
@@ -294,6 +305,17 @@ config ARCH_ZX
 	help
 	  This enables support for ZTE ZX SoC Family
 
+# The GPIO number here must be sorted by descending number.
+# In a case of multiplatform kernel we want the highest value
+# required by selected platforms
+config ARCH_NR_GPIO
+    int
+    default 0
+    help
+      Maximum number of GPIOs in the system.
+
+      If unsure, leave the default value.
+
 config ARCH_ZYNQMP
 	bool "Xilinx ZynqMP Family"
 	select ZYNQMP_FIRMWARE
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index f19b762c008d8..c9702db89ef52 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -7,6 +7,7 @@ subdir-y += amd
 subdir-y += amlogic
 subdir-y += apm
 subdir-y += arm
+subdir-y += baikal
 subdir-y += bitmain
 subdir-y += broadcom
 subdir-y += cavium
diff --git a/arch/arm64/boot/dts/baikal/Makefile b/arch/arm64/boot/dts/baikal/Makefile
new file mode 100644
index 0000000000000..17201158be656
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_ARCH_BAIKAL) += bm1000-dbm10.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bm1000-dbm20.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bm1000-mbm10.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bm1000-mbm20.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bm1000-qemu-m.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bs1000-dbs.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bs1000-dbs-ov.dtb
+dtb-$(CONFIG_ARCH_BAIKAL) += bs1000-qemu-s.dtb
diff --git a/arch/arm64/boot/dts/baikal/bm1000-dbm.dtsi b/arch/arm64/boot/dts/baikal/bm1000-dbm.dtsi
new file mode 100644
index 0000000000000..662ef567109fc
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-dbm.dtsi
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for Baikal Electronics DBM boards
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ */
+
+#include <dt-bindings/input/input.h>
+#include "bm1000.dtsi"
+
+/ {
+	aliases {
+		ethernet0	= &gmac0;
+		ethernet1	= &gmac1;
+		mdio-gpio0	= &mdio_gpio;
+	};
+
+	chosen { };
+
+	buttons-backlight {
+		compatible = "gpio-keys";
+		autorepeat;
+		button-brightness-down {
+			label = "Brightness Down Button";
+			linux,code = <KEY_BRIGHTNESSDOWN>;
+			gpios = <&porta 18 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+		button-brightness-up {
+			label = "Brightness Up Button";
+			linux,code = <KEY_BRIGHTNESSUP>;
+			gpios = <&porta 17 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+		button-brightness-toggle {
+			label = "Brightness Toggle Button";
+			linux,code = <KEY_BRIGHTNESS_TOGGLE>;
+			gpios = <&porta 31 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+	};
+
+	panel {
+		/* Change status to "okay" to make use of LVDS LCD panel */
+		status = "disabled";
+		compatible = "panel-lvds";
+		width-mm = <223>;
+		height-mm = <125>;
+		data-mapping = "vesa-24";
+		panel-timing {
+			/* 1920x1080 @ 60 Hz */
+			/* Replace values below with actual panel timings */
+			clock-frequency = <148500000>;
+			hactive = <1920>;
+			vactive = <1080>;
+			hsync-len = <44>;
+			hfront-porch = <88>;
+			hback-porch = <148>;
+			vsync-len = <5>;
+			vfront-porch = <4>;
+			vback-porch = <36>;
+		};
+		port {
+			panel_in: endpoint@0 {
+				remote-endpoint = <&vdu_lvds_out>;
+			};
+		};
+	};
+
+	soc {
+		mdio_gpio: mdio {
+			compatible = "baikal,mdio-gpio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			mdc-pin = <&porta 28 GPIO_ACTIVE_HIGH>;
+			mdio-pin = <&porta 29 GPIO_ACTIVE_HIGH>;
+			rst-pin = <&porta 30 GPIO_ACTIVE_LOW>;
+			clocks = <&gpio_clk>;
+			clock-names = "gpioclk";
+			status = "okay";
+
+			ethphy2: ethernet-phy@c {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0xc>;
+				phy-mode = "xgmii";
+				mv,line-mode = "KR";
+				mv,host-mode = "KX4";
+			};
+
+			ethphy3: ethernet-phy@e {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0xe>;
+				phy-mode = "xgmii";
+				mv,line-mode = "KR";
+				mv,host-mode = "KX4";
+			};
+		};
+	};
+
+	sound {
+		compatible = "baikal,snd-soc-baikal";
+		baikal,cpu-dai = <&i2s>;
+		baikal,audio-codec = <&tlv320aic3x>;
+		baikal,dai-name = "tlv320aic3x";
+		baikal,codec-name = "tlv320aic3x-hifi";
+		baikal,stream-name = "tlv320aic3x hifi";
+	};
+
+	sfp-xgmac0 {
+		compatible = "sff,sfp";
+		i2c-bus = <&i2c1>;
+	};
+
+	sfp-xgmac1 {
+		compatible = "sff,sfp";
+		i2c-bus = <&i2c1>;
+	};
+};
+
+&dmac_m2m {
+	status = "okay";
+};
+
+&gmac0 {
+	status = "okay";
+	phy-handle = <&ethphy0>;
+	phy-mode = "rgmii-id";
+};
+
+&gmac1 {
+	status = "okay";
+	phy-handle = <&ethphy1>;
+	phy-mode = "rgmii-id";
+};
+
+&gpio32 {
+	status = "okay";
+};
+
+&hda {
+	status = "okay";
+};
+
+&hdmi {
+	status = "okay";
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+
+	tlv320aic3x: tlv320aic3x@18 {
+		#sound-dai-cells = <0>;
+		compatible = "ti,tlv320aic3x";
+		reg = <0x18>;
+		reset-gpios = <&porta 4 GPIO_ACTIVE_LOW>;
+		status = "okay";
+		ai3x-micbias-vg = <1>;
+		ai3x-ocmv = <1>;
+	};
+};
+
+&i2s {
+	status = "okay";
+	sound-dai = <&tlv320aic3x>;
+	system-clock-frequency = <12000000>;
+};
+
+&mdio0 {
+	ethphy0: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+		txd0-skew-ps = <0>;
+		txd1-skew-ps = <0>;
+		txd2-skew-ps = <0>;
+		txd3-skew-ps = <0>;
+		txc-skew-ps = <0xff>;
+	};
+};
+
+&mdio1 {
+	ethphy1: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+		txd0-skew-ps = <0>;
+		txd1-skew-ps = <0>;
+		txd2-skew-ps = <0>;
+		txd3-skew-ps = <0>;
+		txc-skew-ps = <0xff>;
+	};
+};
+
+&mmc {
+	status = "okay";
+#if 0
+	/* eMMC */
+	non-removable;
+	bus-width = <8>;
+	max-clock = <200000000>;
+#else
+	/* SD */
+	disable-wp;
+	bus-width = <4>;
+	max-clock = <25000000>;
+#endif
+};
+
+&pcie0 {
+	status = "okay";
+	bm1000,gen3-eq-fb-mode = <0>;
+	bm1000,phy-rx-ctle = <0xf4>;
+	bm1000,phy-rx-dfe = <0>;
+};
+
+&pcie1 {
+	status = "okay";
+	bm1000,gen3-eq-fb-mode = <0>;
+	bm1000,phy-rx-ctle = <0x64>;
+	bm1000,phy-rx-dfe = <0>;
+};
+
+&pcie2 {
+	status = "okay";
+	bm1000,gen3-eq-fb-mode = <0>;
+	bm1000,phy-rx-ctle = <0x34>;
+	bm1000,phy-rx-dfe = <0>;
+};
+
+&pvt_cluster0 {
+	status = "okay";
+};
+
+&pvt_cluster1 {
+	status = "okay";
+};
+
+&pvt_cluster2 {
+	status = "okay";
+};
+
+&pvt_cluster3 {
+	status = "okay";
+};
+
+&pvt_mali {
+	status = "okay";
+};
+
+&sata0 {
+	status = "okay";
+};
+
+&sata1 {
+	status = "okay";
+};
+
+&smbus1 {
+	status = "okay";
+};
+
+&smbus2 {
+	status = "okay";
+};
+
+&spi0 {
+	num-cs = <4>;
+	cs-gpios = <&porta 24 GPIO_ACTIVE_LOW>, /* SS0 XP8 - DD53 normal flash */
+		   <&porta 25 GPIO_ACTIVE_LOW>, /* SS1 XP9 */
+		   <&porta 26 GPIO_ACTIVE_LOW>, /* SS2 XP10 */
+		   <&porta 27 GPIO_ACTIVE_LOW>; /* SS3 XP11 */
+	status = "okay";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <12500000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "bl1";
+				reg = <0x000000 0x40000>;
+			};
+
+			partition@40000 {
+				label = "dtb";
+				reg = <0x040000 0x40000>;
+			};
+
+			partition@80000 {
+				label = "uefi-vars";
+				reg = <0x080000 0xc0000>;
+			};
+
+			partition@140000 {
+				label = "fip";
+				reg = <0x140000 0x6c0000>;
+			};
+		};
+	};
+};
+
+&spi1 {
+	num-cs = <8>;
+	cs-gpios = <&porta 0 GPIO_ACTIVE_LOW>;
+	status = "okay";
+
+	espi_test0@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>; /* CS #0 */
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-cpha;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+};
+
+&timer1 {
+	status = "okay";
+};
+
+&timer2 {
+	status = "okay";
+};
+
+&timer3 {
+	status = "okay";
+};
+
+&timer4 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
+
+&usb3 {
+	status = "okay";
+};
+
+&vdec {
+	status = "okay";
+};
+
+&vdu {
+	/*
+	 * 'lvds-lanes' property is mandatory to enable LVDS. Valid values are 1, 2 or 4.
+	 * It is also mandatory to provide actual values for 'enable-gpios' property.
+	 * Also make sure that panel's 'status' (see above) is "okay".
+	 */
+	status = "okay";
+	enable-gpios = <&porta 16 GPIO_ACTIVE_LOW>;
+	lvds-lanes = <2>;
+	backlight {
+		min-brightness-level = <10>;
+		default-brightness-level = <60>;
+		brightness-level-step = <2>;
+		pwm-frequency = <20000>;
+	};
+	ports {
+		port@1 {
+			reg = <1>;
+			vdu_lvds_out: endpoint@0 {
+				remote-endpoint = <&panel_in>;
+			};
+		};
+	};
+};
+
+&xgmac0 {
+	status = "okay";
+	ext-phy-handle = <&ethphy2>;
+};
+
+&xgmac1 {
+	status = "okay";
+	ext-phy-handle = <&ethphy3>;
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-dbm10.dts b/arch/arm64/boot/dts/baikal/bm1000-dbm10.dts
new file mode 100644
index 0000000000000..e5751636f48f0
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-dbm10.dts
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for Baikal Electronics DBM 1.0 board
+ * Copyright (C) 2022 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bm1000-dbm.dtsi"
+
+/ {
+	model = "Baikal Electronics DBM 1.0";
+	compatible = "baikal,dbm10", "baikal,bm1000";
+};
+
+&i2c1 {
+	rtc@56 {
+		compatible = "abracon,abeoz9";
+		reg = <0x56>;
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-dbm20.dts b/arch/arm64/boot/dts/baikal/bm1000-dbm20.dts
new file mode 100644
index 0000000000000..12bcd1bd67ec1
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-dbm20.dts
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for Baikal Electronics DBM 2.0 board
+ * Copyright (C) 2022 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bm1000-dbm.dtsi"
+
+/ {
+	model = "Baikal Electronics DBM 2.0";
+	compatible = "baikal,dbm20", "baikal,bm1000";
+};
+
+&i2c2 {
+	rtc@56 {
+		compatible = "abracon,abeoz9";
+		reg = <0x56>;
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-mbm.dtsi b/arch/arm64/boot/dts/baikal/bm1000-mbm.dtsi
new file mode 100644
index 0000000000000..9f72a218f35a8
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-mbm.dtsi
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for MBM-compatible boards
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+#include "bm1000.dtsi"
+
+/ {
+	aliases {
+		ethernet0 = &gmac0;
+		ethernet1 = &gmac1;
+	};
+
+	chosen { };
+
+	leds {
+		compatible = "gpio-leds";
+		led0 {
+			gpios = <&porta 8 GPIO_ACTIVE_HIGH>;
+			default-state = "keep";
+		};
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,name = "MITX-Sound-Card";
+		simple-audio-card,bitclock-master = <&codec0>;
+		simple-audio-card,frame-master = <&codec0>;
+		simple-audio-card,widgets =
+			"Microphone", "Mic Jack",
+			"Headphone", "Headphones",
+			"Speaker", "AUX Out",
+			"Line", "Line In";
+		simple-audio-card,routing =
+			"Headphones", "RHP",
+			"Headphones", "LHP",
+			"AUX Out", "AUXOUT1",
+			"AUX Out", "AUXOUT2",
+			"L2", "Mic Jack",
+			"R2", "Mic Jack",
+			"LAUX", "Line In",
+			"RAUX", "Line In";
+		simple-audio-card,mic-det-gpio = <&porta 26 GPIO_ACTIVE_LOW>;
+		simple-audio-card,format = "i2s";
+		simple-audio-card,cpu {
+			sound-dai = <&i2s>;
+		};
+		codec0: simple-audio-card,codec {
+			sound-dai = <&nau8822 0>;
+		};
+	};
+};
+
+&dmac_m2m {
+	status = "okay";
+};
+
+&gmac0 {
+	status = "okay";
+	phy-handle = <&ethphy0>;
+	phy-mode = "rgmii-id";
+};
+
+&gmac1 {
+	status = "okay";
+	phy-handle = <&ethphy1>;
+	phy-mode = "rgmii-id";
+};
+
+&gpio32 {
+	status = "okay";
+};
+
+&hdmi {
+	status = "okay";
+};
+
+&i2c1 {
+	status = "okay";
+
+	bmc@8 {
+		compatible = "tp,mitx2-bmc";
+		reg = <0x08>;
+	};
+
+	nau8822: nau8822@1a {
+		compatible = "nuvoton,nau8822";
+		#sound-dai-cells = <1>;
+		reg = <0x1a>;
+	};
+
+	gpio@50 {
+		compatible = "nxp,pca9670";
+		#gpio-cells = <2>;
+		gpio-controller;
+		reg = <0x50>;
+	};
+
+	rtc@51 {
+		compatible = "nxp,pcf2129", "nxp,pcf2127";
+		reg = <0x51>;
+	};
+
+	/* FRU */
+	eeprom@53 {
+		compatible = "atmel,24c32";
+		pagesize = <32>;
+		reg = <0x53>;
+	};
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2s {
+	status = "okay";
+	system-clock-frequency = <12000000>;
+	#sound-dai-cells = <0>;
+};
+
+&mdio0 {
+	ethphy0: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+		txd0-skew-ps = <0>;
+		txd1-skew-ps = <0>;
+		txd2-skew-ps = <0>;
+		txd3-skew-ps = <0>;
+		txc-skew-ps = <0xff>;
+	};
+};
+
+&mdio1 {
+	ethphy1: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+		txd0-skew-ps = <0>;
+		txd1-skew-ps = <0>;
+		txd2-skew-ps = <0>;
+		txd3-skew-ps = <0>;
+		txc-skew-ps = <0xff>;
+	};
+};
+
+&mmc {
+	status = "okay";
+	disable-wp;
+	bus-width = <4>;
+	max-clock = <25000000>;
+};
+
+&pcie0 {
+	status = "okay";
+	reset-gpios = <&porta 6 GPIO_ACTIVE_LOW>;
+	bm1000,gen3-eq-fb-mode = <0>;
+	bm1000,phy-rx-ctle = <0x34>;
+	bm1000,phy-rx-dfe = <0>;
+};
+
+&pcie2 {
+	status = "okay";
+	reset-gpios = <&porta 3 GPIO_ACTIVE_LOW>;
+	bm1000,gen3-eq-fb-mode = <0>;
+	bm1000,phy-rx-ctle = <0x34>;
+	bm1000,phy-rx-dfe = <0>;
+};
+
+&porta {
+	pcieclk {
+		gpio-hog;
+		gpios = <1 GPIO_ACTIVE_LOW>;
+		output-high;
+		line-name = "pcie-x8-clock";
+	};
+};
+
+&pvt_cluster0 {
+	status = "okay";
+};
+
+&pvt_cluster1 {
+	status = "okay";
+};
+
+&pvt_cluster2 {
+	status = "okay";
+};
+
+&pvt_cluster3 {
+	status = "okay";
+};
+
+&pvt_mali {
+	status = "okay";
+};
+
+&sata0 {
+	status = "okay";
+};
+
+&sata1 {
+	status = "okay";
+};
+
+&smbus1 {
+	status = "okay";
+};
+
+&smbus2 {
+	status = "okay";
+};
+
+&spi0 {
+	num-cs = <4>;
+	cs-gpios = <0>;
+	status = "okay";
+};
+
+&timer1 {
+	status = "okay";
+};
+
+&timer2 {
+	status = "okay";
+};
+
+&timer3 {
+	status = "okay";
+};
+
+&timer4 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
+
+&usb3 {
+	status = "okay";
+};
+
+&vdec {
+	status = "okay";
+};
+
+&vdu {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-mbm10.dts b/arch/arm64/boot/dts/baikal/bm1000-mbm10.dts
new file mode 100644
index 0000000000000..7dd80e5a24f6e
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-mbm10.dts
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for MBM 1.0 compatible boards:
+ *   - TP-TF307-MB-A0 Rev.1.0 (BM1BM1-A)
+ *   - TF307-MB-S-C Rev.3.0
+ *
+ * Copyright (C) 2021-2022 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bm1000-mbm.dtsi"
+
+/ {
+	model = "Baikal Electronics MBM 1.0";
+	compatible = "baikal,mbm10", "baikal,bm1000";
+
+	sound {
+		simple-audio-card,hp-det-gpio = <&porta 27 GPIO_ACTIVE_HIGH>;
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-mbm20.dts b/arch/arm64/boot/dts/baikal/bm1000-mbm20.dts
new file mode 100644
index 0000000000000..23ed4e333e801
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-mbm20.dts
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for MBM 2.0 compatible boards:
+ *   - TF307-MB-S-D Rev.4.0 (BM1BM1-D)
+ *
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "bm1000-mbm.dtsi"
+
+/ {
+	model = "Baikal Electronics MBM 2.0";
+	compatible = "baikal,mbm20", "baikal,bm1000";
+
+	buttons-backlight {
+		compatible = "gpio-keys";
+		autorepeat;
+		button-brightness-down {
+			label = "Brightness Down Button";
+			linux,code = <KEY_BRIGHTNESSDOWN>;
+			gpios = <&porta 18 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+		button-brightness-up {
+			label = "Brightness Up Button";
+			linux,code = <KEY_BRIGHTNESSUP>;
+			gpios = <&porta 17 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+		button-brightness-toggle {
+			label = "Brightness Toggle Button";
+			linux,code = <KEY_BRIGHTNESS_TOGGLE>;
+			gpios = <&porta 31 GPIO_ACTIVE_LOW>;
+			debounce-interval = <50>;
+		};
+	};
+
+	panel {
+		/*
+		 * In order to utilize LVDS LCD panel, make sure that
+		 * 'status' is "okay" along with &vdu 'status' (see below).
+		 */
+		status = "disabled";
+		compatible = "panel-lvds";
+		width-mm = <223>;
+		height-mm = <125>;
+		data-mapping = "vesa-24";
+		panel-timing {
+			/* 1920x1080 @ 60 Hz */
+			/* Replace values below with actual panel timings */
+			clock-frequency = <148500000>;
+			hactive = <1920>;
+			vactive = <1080>;
+			hsync-len = <44>;
+			hfront-porch = <88>;
+			hback-porch = <148>;
+			vsync-len = <5>;
+			vfront-porch = <4>;
+			vback-porch = <36>;
+		};
+		port {
+			panel_in: endpoint@0 {
+				remote-endpoint = <&vdu_lvds_out>;
+			};
+		};
+	};
+
+	sound {
+		simple-audio-card,hp-det-gpio = <&porta 29 GPIO_ACTIVE_HIGH>;
+	};
+};
+
+&gmac0 {
+	snps,reset-gpios = <&porta 19 GPIO_ACTIVE_LOW>;
+	snps,reset-delays-us = <0 10000 50000>;
+};
+
+&gmac1 {
+	snps,reset-gpios = <&porta 20 GPIO_ACTIVE_LOW>;
+	snps,reset-delays-us = <0 10000 50000>;
+};
+
+&vdu {
+	/*
+	 * 'lvds-lanes' property is mandatory to enable LVDS. Valid values are 1, 2 or 4.
+	 * It is also mandatory to provide actual values for 'enable-gpios' property.
+	 * Also make sure that panel's 'status' (see above) is "okay".
+	 */
+	enable-gpios = <&porta 16 GPIO_ACTIVE_LOW>;
+	lvds-lanes = <2>;
+	backlight {
+		min-brightness-level = <10>;
+		default-brightness-level = <60>;
+		brightness-level-step = <2>;
+		pwm-frequency = <20000>;
+	};
+	ports {
+		port@1 {
+			reg = <1>;
+			vdu_lvds_out: endpoint@0 {
+				remote-endpoint = <&panel_in>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000-qemu-m.dts b/arch/arm64/boot/dts/baikal/bm1000-qemu-m.dts
new file mode 100644
index 0000000000000..07734bc78c208
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000-qemu-m.dts
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for Baikal Electronics QEMU-M virtual platform
+ * Copyright (C) 2018-2023 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bm1000.dtsi"
+
+/ {
+	model = "Baikal Electronics QEMU-M";
+	compatible = "baikal,qemu-m", "baikal,bm1000";
+
+	aliases {
+		ethernet0 = &gmac0;
+		ethernet1 = &gmac1;
+	};
+
+	chosen { };
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000 0x0 0x80000000>; /* 2 GiB */
+	};
+
+	/*
+	 * Device is necessary for UEFI to boot on QEMU,
+	 * need to replace it with something later.
+	 */
+	flash@0 {
+		compatible = "cfi-flash";
+		reg = <0x0 0x4000000 0x0 0x4000000>;
+		bank-width = <0x4>;
+	};
+
+	panel: panel {
+		compatible = "auo,b133htn01";		/* 1920x1080 */
+		port {
+			lcd_panel: endpoint {
+				remote-endpoint = <&vdu_pads>;
+			};
+		};
+	};
+};
+
+&gmac0 {
+	status = "okay";
+	phy-handle = <&ethphy0>;
+	phy-mode = "rgmii-id";
+};
+
+&gmac1 {
+	status = "okay";
+	phy-handle = <&ethphy1>;
+	phy-mode = "rgmii-id";
+};
+
+&mdio0 {
+	ethphy0: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+	};
+};
+
+&mdio1 {
+	ethphy1: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+	};
+};
+
+&mmc {
+	status = "okay";
+	disable-wp;
+	bus-width = <4>;
+	max-clock = <25000000>;
+};
+
+&sata0 {
+	status = "okay";
+};
+
+&sata1 {
+	status = "okay";
+};
+
+&spi0 {
+	num-cs = <6>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <12500000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "bl1";
+				reg = <0x000000 0x40000>;
+			};
+
+			partition@40000 {
+				label = "dtb";
+				reg = <0x040000 0x40000>;
+			};
+
+			partition@80000 {
+				label = "uefi-vars";
+				reg = <0x080000 0xc0000>;
+			};
+
+			partition@140000 {
+				label = "fip";
+				reg = <0x140000 0x6c0000>;
+			};
+		};
+	};
+};
+
+&uart1 {
+	status = "okay";
+	clocks = <&avlsp_cmu0 1>, <&apb_clk>, <&apb_clk>;
+	clock-names = "soc_uartclk", "apb_pclk", "baudclk";
+};
+
+&vdu {
+	port {
+		vdu_pads: endpoint {
+			remote-endpoint = <&lcd_panel>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bm1000.dtsi b/arch/arm64/boot/dts/baikal/bm1000.dtsi
new file mode 100644
index 0000000000000..4cb7285f4bc44
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bm1000.dtsi
@@ -0,0 +1,1391 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for BE-M1000 SoC
+ * Copyright (C) 2017-2023 Baikal Electronics, JSC
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#define CLUSTER_OPP_TABLE(idx)					\
+	cluster##idx##_opp: opp-table-cluster##idx {		\
+		compatible = "operating-points-v2";		\
+		opp-shared;					\
+								\
+		opp-500000000 {					\
+			opp-hz = /bits/ 64 <500000000>;		\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-600000000 {					\
+			opp-hz = /bits/ 64 <600000000>;		\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-700000000 {					\
+			opp-hz = /bits/ 64 <700000000>;		\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-800000000 {					\
+			opp-hz = /bits/ 64 <800000000>;		\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-900000000 {					\
+			opp-hz = /bits/ 64 <900000000>;		\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1000000000 {				\
+			opp-hz = /bits/ 64 <1000000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1100000000 {				\
+			opp-hz = /bits/ 64 <1100000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1200000000 {				\
+			opp-hz = /bits/ 64 <1200000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1300000000 {				\
+			opp-hz = /bits/ 64 <1300000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1400000000 {				\
+			opp-hz = /bits/ 64 <1400000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+		opp-1500000000 {				\
+			opp-hz = /bits/ 64 <1500000000>;	\
+			clock-latency-ns = <10000000>;		\
+		};						\
+	}
+
+/ {
+	compatible = "baikal,bm1000";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&gic>;
+
+	aliases {
+		i2c1	= &i2c1;
+		i2c2	= &i2c2;
+		i2c3	= &smbus1;
+		i2c4	= &smbus2;
+		serial0	= &uart1;
+		serial1	= &uart2;
+	};
+
+	clocks {
+		osc25: oscillator25 { /* external oscillator */
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <25000000>;
+			clock-output-names = "osc25";
+		};
+		osc27: oscillator27 { /* external oscillator */
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <27000000>;
+			clock-output-names = "osc27";
+		};
+		cpu_clk: cpu_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1500000000>;
+			clock-output-names = "cpuclk";
+		};
+		apb_clk: apb_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <200000000>;
+			clock-output-names = "apb_pclk";
+		};
+		uart_clk: uart_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <7372800>;
+			clock-output-names = "soc_uartclk";
+		};
+		i2c_clk: i2c_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <100000000>;
+			clock-output-names = "soc_i2cclk";
+		};
+		smbus_clk: smbus_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_smbusclk";
+		};
+		timer1_clk: timer1_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_timer1clk";
+		};
+		timer2_clk: timer2_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_timer2clk";
+		};
+		timer3_clk: timer3_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_timer3clk";
+		};
+		timer4_clk: timer4_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_timer4clk";
+		};
+		gpio_clk: gpio_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "soc_gpioclk";
+		};
+		spi_clk: spi_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "soc_spiclk";
+		};
+		soc_ethclk: ethclk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "eth_clk";
+		};
+		soc_xgbeclk: xgbeclk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <156250000>;
+			clock-output-names = "xgbe_clk";
+		};
+		soc_smc50mhz: clk50mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "smc_clk";
+		};
+		soc_faxiclk: refclk400mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <400000000>;
+			clock-output-names = "faxi_clk";
+		};
+		soc_tmp_clk: refclkXXXmhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <100000000>;
+			clock-output-names = "tmpclk";
+		};
+		gpu_clk: gpu_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <750000000>;
+			clock-output-names = "gpuclk";
+		};
+		clk_ahb: clk_ahb {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <100000000>;
+			clock-output-names = "clk_ahb";
+		};
+		clk_xin: clk_xin {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <100000000>;
+			clock-output-names = "clk_xin";
+		};
+		avlsp_cmu1_div7: avlsp_cmu1_div7 {
+			compatible = "allwinner,sun4i-a10-pll3-2x-clk", "fixed-factor-clock";
+			#clock-cells = <0>;
+			clocks = <&avlsp_cmu1>;
+			clock-div = <7>;
+			clock-mult = <1>;
+			clock-output-names = "lvds_clk";
+		};
+	};
+
+	psci {
+		compatible = "arm,psci-1.0", "arm,psci-0.2";
+		method = "smc";
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		/* Do not use 'cpu-map'. It leads to wrong topology. */
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster0_l2>;
+			clocks = <&cluster0_cmu0>;
+			operating-points-v2 = <&cluster0_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster0_l2>;
+			clocks = <&cluster0_cmu0>;
+			operating-points-v2 = <&cluster0_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu2: cpu@100 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster1_l2>;
+			clocks = <&cluster1_cmu0>;
+			operating-points-v2 = <&cluster1_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu3: cpu@101 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster1_l2>;
+			clocks = <&cluster1_cmu0>;
+			operating-points-v2 = <&cluster1_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu4: cpu@200 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x200>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster2_l2>;
+			clocks = <&cluster2_cmu0>;
+			operating-points-v2 = <&cluster2_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu5: cpu@201 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x201>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster2_l2>;
+			clocks = <&cluster2_cmu0>;
+			operating-points-v2 = <&cluster2_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu6: cpu@300 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x300>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster3_l2>;
+			clocks = <&cluster3_cmu0>;
+			operating-points-v2 = <&cluster3_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		cpu7: cpu@301 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a57";
+			reg = <0x0 0x301>;
+			enable-method = "psci";
+			i-cache-size = <0xc000>;
+			i-cache-line-size = <64>;
+			i-cache-sets = <256>;
+			d-cache-size = <0x8000>;
+			d-cache-line-size = <64>;
+			d-cache-sets = <256>;
+			next-level-cache = <&cluster3_l2>;
+			clocks = <&cluster3_cmu0>;
+			operating-points-v2 = <&cluster3_opp>;
+			cpu-idle-states = <&CPU_SLEEP>;
+		};
+
+		idle-states {
+			entry-method = "psci";
+
+			CPU_SLEEP: cpu-sleep {
+				compatible = "arm,idle-state";
+				local-timer-stop;
+				arm,psci-suspend-param = <0x0000001>;
+				entry-latency-us = <300>;
+				exit-latency-us = <1200>;
+				min-residency-us = <2000>;
+			};
+		};
+
+		cluster0_l2: l2-cache0 {
+			compatible = "cache";
+			cache-size = <0x100000>;
+			cache-line-size = <64>;
+			cache-sets = <1024>;
+			cache-unified;
+			cache-level = <2>;
+			next-level-cache = <&l3>;
+		};
+
+		cluster1_l2: l2-cache1 {
+			compatible = "cache";
+			cache-size = <0x100000>;
+			cache-line-size = <64>;
+			cache-sets = <1024>;
+			cache-unified;
+			cache-level = <2>;
+			next-level-cache = <&l3>;
+		};
+
+		cluster2_l2: l2-cache2 {
+			compatible = "cache";
+			cache-size = <0x100000>;
+			cache-line-size = <64>;
+			cache-sets = <1024>;
+			cache-unified;
+			cache-level = <2>;
+			next-level-cache = <&l3>;
+		};
+
+		cluster3_l2: l2-cache3 {
+			compatible = "cache";
+			cache-size = <0x100000>;
+			cache-line-size = <64>;
+			cache-sets = <1024>;
+			cache-unified;
+			cache-level = <2>;
+			next-level-cache = <&l3>;
+		};
+
+		l3: l3-cache {
+			cache-size = <0x800000>;
+			cache-unified;
+			cache-level = <3>;
+		};
+	};
+
+	CLUSTER_OPP_TABLE(0);
+	CLUSTER_OPP_TABLE(1);
+	CLUSTER_OPP_TABLE(2);
+	CLUSTER_OPP_TABLE(3);
+
+	pmu {
+		compatible = "arm,cortex-a57-pmu";
+		interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_LOW>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		cluster0_cmu0: clock-controller@28000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x28000000 0x0 0x10000>;
+			clock-output-names = "bm1000-cluster0-cmu0";
+			#clock-cells = <0>;
+			clocks = <&osc25>;
+			clock-frequency = <1500000000>;
+			min = <500000000>;
+			max = <1500000000>;
+		};
+
+		cluster1_cmu0: clock-controller@c000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0xc000000 0x0 0x10000>;
+			clock-output-names = "bm1000-cluster1-cmu0";
+			#clock-cells = <0>;
+			clocks = <&osc25>;
+			clock-frequency = <1500000000>;
+			min = <500000000>;
+			max = <1500000000>;
+		};
+
+		cluster2_cmu0: clock-controller@a000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0xa000000 0x0 0x10000>;
+			clock-output-names = "bm1000-cluster2-cmu0";
+			#clock-cells = <0>;
+			clocks = <&osc25>;
+			clock-frequency = <1500000000>;
+			min = <500000000>;
+			max = <1500000000>;
+		};
+
+		cluster3_cmu0: clock-controller@26000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x26000000 0x0 0x10000>;
+			clock-output-names = "bm1000-cluster3-cmu0";
+			#clock-cells = <0>;
+			clocks = <&osc25>;
+			clock-frequency = <1500000000>;
+			min = <500000000>;
+			max = <1500000000>;
+		};
+
+		avlsp_cmu0: clock-controller@20000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x20000000 0x0 0x10000>;
+			clock-output-names = "bm1000-avlsp-cmu0";
+			#clock-cells = <1>;
+			clocks = <&osc25>;
+			clock-frequency = <1200000000>;
+			min = <800000>;
+			max = <2100000000>;
+			clock-names = "gpio",
+				      "uart1",
+				      "uart2",
+				      "apb",
+				      "spi",
+				      "espi",
+				      "i2c1",
+				      "i2c2",
+				      "timer1",
+				      "timer2",
+				      "timer3",
+				      "timer4",
+				      "dmac",
+				      "smbus1",
+				      "smbus2",
+				      "hda_sys_clk",
+				      "hda_clk48",
+				      "mshc_axi",
+				      "mshc_ahb",
+				      "mshc_tx_x2",
+				      "mshc_b",
+				      "mshc_tm",
+				      "mshc_cqetm",
+				      "hwa_clu",
+				      "hwa_clu_hf",
+				      "hwa_axi",
+				      "vdu_axi",
+				      "smmu";
+			clock-indices =	<0>, <1>, <2>, <3>, <4>, <5>, <6>,
+					<7>, <8>, <9>, <10>, <11>, <12>, <13>,
+					<14>, <15>, <16>, <17>, <18>, <19>, <20>,
+					<21>, <22>, <23>, <24>, <25>, <26>, <27>;
+		};
+
+		avlsp_cmu1: clock-controller@20010000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x20010000 0x0 0x10000>;
+			clock-output-names = "bm1000-avlsp-cmu1";
+			#clock-cells = <0>;
+			clocks = <&osc27>;
+			clock-frequency = <1039500000>;
+			min = <13500000>;
+			max = <2100000000>;
+		};
+
+		mali_cmu0: clock-controller@2a000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x2a000000 0x0 0x10000>;
+			clock-output-names = "bm1000-mali-cmu0";
+			#clock-cells = <0>;
+			clocks = <&osc25>;
+			clock-frequency = <750000000>;
+			min = <400000000>;
+			max = <800000000>;
+			clock-names = "aclk";
+		};
+
+		usb_cmu0: clock-controller@2c000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x2c000000 0x0 0x10000>;
+			clock-output-names = "bm1000-usb-cmu0";
+			#clock-cells = <1>;
+			clocks = <&osc25>;
+			clock-frequency = <800000000>;
+			min = <100000000>;
+			max = <800000000>;
+			clock-names = "sata_ref_alt_clk",
+				      "sata_aclk_u0",
+				      "sata_aclk_u1",
+				      "usb2_phy0_ref_clk",
+				      "usb2_phy1_ref_clk",
+				      "usb2_aclk",
+				      "usb2_clk_sofitp",
+				      "usb3_phy0_ref_clk",
+				      "usb3_phy1_ref_clk",
+				      "usb3_phy2_ref_clk",
+				      "usb3_phy3_ref_clk",
+				      "usb3_aclk",
+				      "usb3_clk_sofitp",
+				      "usb3_clk_suspend",
+				      "smmu_aclk",
+				      "dmac_aclk",
+				      "gic_aclk";
+			clock-indices =	<0>, <1>, <2>, <3>, <4>, <5>,
+					<6>, <7>, <8>, <9>, <10>, <11>,
+					<12>, <13>, <14>, <15>, <16>;
+		};
+
+		xgbe_cmu0: clock-controller@30000000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x30000000 0x0 0x10000>;
+			clock-output-names = "bm1000-xgbe-cmu0";
+			#clock-cells = <1>;
+			clocks = <&osc25>;
+			clock-frequency = <1250000000>;
+			min = <50000000>;
+			max = <1250000000>;
+			clock-names = "csr50mhz",
+				      "gmac0_tx2",
+				      "gmac1_tx2",
+				      "hdmi_aclk",
+				      "isfr";
+			clock-indices = <0>, <10>, <13>, <15>, <17>;
+		};
+
+		xgbe_cmu1: clock-controller@30010000 {
+			compatible = "baikal,bm1000-cmu";
+			reg = <0x0 0x30010000 0x0 0x10000>;
+			clock-output-names = "bm1000-xgbe-cmu1";
+			#clock-cells = <1>;
+			clocks = <&osc27>;
+			clock-frequency = <25250000>;
+			min = <13500000>;
+			max = <600000000>;
+			clock-names = "pixelclk";
+			clock-indices = <0>;
+		};
+
+		pcie_gpr: syscon@2050000 {
+			compatible = "baikal,bm1000-pcie-gpr", "syscon";
+			reg = <0x0 0x2050000 0x0 0x100>;
+		};
+
+		pcie0: pcie@2200000 { /* PCIe x4 #0 */
+			device_type = "pci";
+			compatible = "baikal,bm1000-pcie";
+			reg = <0x00000000 0x02200000 0x0 0x1000>,
+			      <0x00000040 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 427 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 433 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 434 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 436 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 437 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "aer", "pme", "msi",
+					  "msi0", "msi1", "msi2", "msi3",
+					  "msi4", "msi5", "msi6", "msi7";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00000000 0x0 0x4ff00000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40100000 0x0 0x40100000 0x0 0xfe00000>;
+			num-lanes = <4>;
+			num-viewport = <4>;
+			msi-parent = <&its>;
+			msi-map = <0x0 &its 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie1: pcie@2210000 { /* PCIe x4 #1 */
+			device_type = "pci";
+			compatible = "baikal,bm1000-pcie";
+			reg = <0x00000000 0x02210000 0x0 0x1000>,
+			      <0x00000050 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 402 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 403 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 410 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 411 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 412 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 413 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "aer", "pme", "msi",
+					  "msi0", "msi1", "msi2", "msi3",
+					  "msi4", "msi5", "msi6", "msi7";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00100000 0x0 0x5ff00000 0x0 0x100000>,
+				 <0x82000000 0x0 0x50000000 0x0 0x50000000 0x0 0xff00000>;
+			num-lanes = <4>;
+			num-viewport = <4>;
+			msi-parent = <&its>;
+			msi-map = <0x0 &its 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie2: pcie@2220000 { /* PCIe x8 */
+			device_type = "pci";
+			compatible = "baikal,bm1000-pcie";
+			reg = <0x00000000 0x02220000 0x0 0x1000>,
+			      <0x00000060 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 378 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 381 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 382 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 384 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 385 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 386 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 387 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 388 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 389 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "aer", "pme", "msi",
+					  "msi0", "msi1", "msi2", "msi3",
+					  "msi4", "msi5", "msi6", "msi7";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00200000 0x0 0x7ff00000 0x0 0x100000>,
+				 <0x82000000 0x0 0x60000000 0x0 0x60000000 0x0 0x1ff00000>;
+			num-lanes = <8>;
+			num-viewport = <16>;
+			msi-parent = <&its>;
+			msi-map = <0x0 &its 0x20000 0x10000>;
+			status = "disabled";
+		};
+
+		ccn: ccn@9000000 {
+			compatible = "arm,ccn-504";
+			reg = <0x0 0x9000000 0 0x1000000>;
+			interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		ddr0: memory-controller@e200000 {
+			compatible = "baikal,bm1000-edac-mc";
+			reg = <0x0 0x0e200000 0x0 0x10000>;
+			interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			clocks = <&soc_smc50mhz>;
+			clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr1: memory-controller@22200000 {
+			compatible = "baikal,bm1000-edac-mc";
+			reg = <0x0 0x22200000 0x0 0x10000>;
+			interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			clocks = <&soc_smc50mhz>;
+			clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		gpio32: gpio@20200000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x0 0x20200000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+
+			porta: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <32>;
+				reg = <0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
+
+		spi0: spi@20210000 {
+			compatible = "snps,dw-apb-ssi";
+			reg = <0x0 0x20210000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&avlsp_cmu0 4>;
+			clock-names = "ssi_clk";
+			status = "disabled";
+		};
+
+		i2s: i2s@20220000 {
+			compatible = "snps,designware-i2s";
+			reg = <0x0 0x20220000 0x0 0x10000>;
+			interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>, /* rx_da  */
+				  /* <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,    rx_or  */
+				     <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>, /* tx_emp */
+				     <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>; /* tx_or  */
+			/*
+			 * dmas = <&dmac_lsp 0 1 0>,
+			 *	  <&dmac_lsp 1 0 1>;
+			 * dma-names = "tx", "rx";
+			 * #sound-dai-cells = <0>;
+			 */
+			clocks = <&soc_tmp_clk>;
+			clock-names = "i2sclk";
+			status = "disabled";
+		};
+
+		uart1: serial@20230000 {
+			compatible = "baikal,bm1000-uart", "snps,dw-apb-uart";
+			reg = <0x0 0x20230000 0x0 0x100>;
+			interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			clocks = <&avlsp_cmu0 1>, <&apb_clk>;
+			clock-names = "baudclk", "apb_pclk";
+			status = "disabled";
+		};
+
+		uart2: serial@20240000 {
+			compatible = "baikal,bm1000-uart", "snps,dw-apb-uart";
+			reg = <0x0 0x20240000 0x0 0x100>;
+			interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			clocks = <&avlsp_cmu0 2>, <&apb_clk>;
+			clock-names = "baudclk", "apb_pclk";
+			status = "disabled";
+		};
+
+		i2c1: i2c@20250000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0x20250000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+			i2c-sda-hold-time-ns = <500>;
+			clock-frequency = <400000>;
+			clocks = <&i2c_clk>;
+			clock-names = "ref";
+			status = "disabled";
+		};
+
+		i2c2: i2c@20260000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0x20260000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+			i2c-sda-hold-time-ns = <500>;
+			clock-frequency = <400000>;
+			clocks = <&i2c_clk>;
+			clock-names = "ref";
+			status = "disabled";
+		};
+
+		smbus1: i2c@20270000 {
+			compatible = "baikal,bm1000-smbus";
+			reg = <0x0 0x20270000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <100000>;
+			clocks = <&smbus_clk>;
+			clock-names = "soc_smbusclk";
+			status = "disabled";
+		};
+
+		smbus2: i2c@20280000 {
+			compatible = "baikal,bm1000-smbus";
+			reg = <0x0 0x20280000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <100000>;
+			clocks = <&smbus_clk>;
+			clock-names = "soc_smbusclk";
+			status = "disabled";
+		};
+
+		timer1: timer@20290000 {
+			compatible = "snps,dw-apb-timer-osc";
+			reg = <0x0 0x20290000 0x0 0x14>;
+			interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&timer1_clk>;
+			clock-names = "timer";
+			status = "disabled";
+		};
+
+		timer2: timer@20290014 {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0x20290014 0x0 0x14>;
+			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&timer2_clk>;
+			clock-names = "timer";
+			status = "disabled";
+		};
+
+		timer3: timer@20290028 {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0x20290028 0x0 0x14>;
+			interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&timer3_clk>;
+			clock-names = "timer";
+			status = "disabled";
+		};
+
+		timer4: timer@2029003c {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0x2029003c 0x0 0x14>;
+			interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&timer4_clk>;
+			clock-names = "timer";
+			status = "disabled";
+		};
+
+		spi1: spi@202a0000 {
+			compatible = "baikal,bm1000-espi";
+			reg = <0x0 0x202a0000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <5000000>;
+			clocks = <&avlsp_cmu0 5>;
+			clock-names = "soc_espiclk";
+			status = "disabled";
+			/*
+			 * Block Configuration:
+			 * - master/slave
+			 * - 32-bit APB slave
+			 * - tx-fifo = rx-fifo = 256 byte
+			 * - 4 SPI IO channels
+			 * - 8 slave select IO channels
+			 * - DMA - missing
+			 * - M-flash controller - missing
+			 */
+		};
+
+		dmac_lsp: dma-controller@202b0000 {
+			compatible = "snps,dma-spear1340";
+			reg = <0x0 0x202b0000 0x0 0x10000>;
+			interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+			dma-channels = <8>;
+			dma-requests = <16>;
+			dma-masters = <2>;
+			#dma-cells = <3>;
+			chan_allocation_order = <0>;
+			chan_priority = <0>;
+			block_size = <0xff>;
+			data-width = <4 16>;
+			clocks = <&avlsp_cmu0 12>;
+			clock-names = "dmac";
+			snps,dma-protection-control = <0>;
+			status = "disabled";
+		};
+
+		hda: hda@202c0000 {
+			compatible = "baikal,bm1000-hda";
+			reg = <0x0 0x202c0000 0x0 0x1000>;
+			interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&avlsp_cmu0 15>, <&avlsp_cmu0 16>;
+			clock-names = "hda_sys_clk", "hda_clk48";
+			status = "disabled";
+			force-polling-mode;
+			broken-response-irq;
+			increment-codec-address;
+			cyclic-codec-probe;
+		};
+
+		mmc: mmc@202e0000 {
+			compatible = "snps,dwcmshc-sdhci";
+			reg = <0x0 0x202e0000 0x0 0x10000>;
+			interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&avlsp_cmu0 18>, <&avlsp_cmu0 19>;
+			clock-names = "bus", "core";
+			status = "disabled";
+		};
+
+		vdec: vdec@24200000 {
+			compatible = "baikal,d5500-vxd";
+			reg = <0x0 0x24200000 0x0 0x10000>;
+			interrupts = <GIC_SPI 497 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		mali: gpu@2a200000 {
+			compatible = "arm,mali-midgard", "arm,mali-t628";
+			reg = <0x0 0x2a200000 0x0 0x4000>;
+			#cooling-cells = <2>; /* min followed by max */
+			interrupts = <GIC_SPI 175 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "job", "mmu", "gpu";
+			clocks = <&mali_cmu0>;
+			clock-names = "gpuclk";
+			operating-points-v2 = <&gpu_opp_table>;
+			system-coherency = <0>;
+
+			gpu_opp_table: opp-table {
+				compatible = "operating-points-v2", "operating-points-v2-mali";
+
+				opp-400000000 {
+					opp-hz = /bits/ 64 <400000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-450000000 {
+					opp-hz = /bits/ 64 <450000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-500000000 {
+					opp-hz = /bits/ 64 <500000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-550000000 {
+					opp-hz = /bits/ 64 <550000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-600000000 {
+					opp-hz = /bits/ 64 <600000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-650000000 {
+					opp-hz = /bits/ 64 <650000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-700000000 {
+					opp-hz = /bits/ 64 <700000000>;
+					clock-latency-ns = <10000000>;
+				};
+				opp-750000000 {
+					opp-hz = /bits/ 64 <750000000>;
+					clock-latency-ns = <10000000>;
+				};
+			};
+		};
+
+		usb2: usb@2c400000 {
+			compatible = "snps,dwc3";
+			reg = <0x0 0x2c400000 0x0 0x100000>;
+			interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "dwc_usb3";
+			dr_mode="host";
+			dma-coherent;
+			clocks = <&usb_cmu0 5>, <&usb_cmu0 6>;
+			clock-names = "bus_early", "ref";
+			phys = <&usb_phy 0>;
+			phy-names = "usb2-phy";
+		};
+
+		usb3: usb@2c500000 {
+			compatible = "snps,dwc3";
+			reg = <0x0 0x2c500000 0x0 0x100000>;
+			interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "dwc_usb3";
+			dr_mode="host";
+			dma-coherent;
+			clocks = <&usb_cmu0 11>, <&usb_cmu0 12>, <&usb_cmu0 13>;
+			clock-names = "bus_early", "ref", "suspend";
+			phys = <&usb_phy 1>, <&usb_phy 2>;
+			phy-names = "usb3-phy", "usb2-phy";
+		};
+
+		usb_phy: usb_phy {
+			compatible = "baikal,bm1000-usb-phy";
+			status = "okay";
+			clocks = <&usb_cmu0 3>, <&usb_cmu0 4>, <&usb_cmu0 7>,
+				 <&usb_cmu0 8>, <&usb_cmu0 9>, <&usb_cmu0 10>;
+			#phy-cells = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			usb-phy@0 {
+				reg = <0>;
+				enable;
+			};
+			usb-phy@1 {
+				reg = <1>;
+				enable;
+			};
+			usb-phy@2 {
+				reg = <2>;
+				enable;
+			};
+		};
+
+		sata0: sata@2c600000 {
+			compatible = "baikal,bm1000-ahci", "snps,dwc-ahci", "generic-ahci";
+			reg = <0x0 0x2c600000 0 0x10000>;
+			interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
+			ports-implemented = <1>;
+			dma-coherent;
+			clocks = <&soc_faxiclk>;
+			clock-names = "aclk";
+			status = "disabled";
+		};
+
+		sata1: sata@2c610000 {
+			compatible = "baikal,bm1000-ahci", "snps,dwc-ahci", "generic-ahci";
+			reg = <0x0 0x2c610000 0 0x10000>;
+			interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
+			ports-implemented = <1>;
+			dma-coherent;
+			clocks = <&soc_faxiclk>;
+			clock-names = "aclk";
+			status = "disabled";
+		};
+
+		dmac_m2m: dma-controller@2c630000 {
+			compatible = "arm,pl330", "arm,primecell";
+			reg = <0x0 0x2c630000 0 0x10000>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
+			interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 224 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 226 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 227 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 228 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 230 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 231 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&soc_faxiclk>;
+			clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		gic: interrupt-controller@2d000000 {
+			compatible = "arm,gic-v3";
+			reg = <0x0 0x2d000000 0x0 0x10000>,  /* GICD */
+			      <0x0 0x2d100000 0x0 0x200000>, /* GICR */
+			      <0x0 0x10200000 0x0 0x2000>,   /* GICC */
+			      <0x0 0x10210000 0x0 0x1000>,   /* GICH */
+			      <0x0 0x10220000 0x0 0x2000>;   /* GICV */
+			#interrupt-cells = <3>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			interrupt-controller;
+			interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+
+			its: interrupt-controller@2d020000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x2d020000 0x0 0x20000>; /* GITS */
+				msi-controller;
+			};
+		};
+
+		xgmac0: ethernet@30200000 {
+			compatible = "amd,xgbe-seattle-v1a";
+			reg = <0x0 0x30200000 0x0 0x10000>,
+			      <0x0 0x30210000 0x0 0x10000>;
+			interrupts = <GIC_SPI 293 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 294 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 295 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 296 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 299 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 301 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 302 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 303 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 309 IRQ_TYPE_LEVEL_HIGH>;
+			fsl,num-rx-queues = <3>;
+			clocks = <&soc_xgbeclk>, <&soc_xgbeclk>, <&soc_xgbeclk>;
+			clock-names = "dma_clk", "ptp_clk", "xgbe_clk";
+			phy-mode = "xgmii";
+			be,pcs-mode = "KX4";
+			status = "disabled";
+			amd,per-channel-interrupt;
+			amd,speed-set = <0>;
+			#stream-id-cells = <16>;
+		};
+
+		xgmac1: ethernet@30220000 {
+			compatible = "amd,xgbe-seattle-v1a";
+			reg = <0x0 0x30220000 0x0 0x10000>,
+			      <0x0 0x30230000 0x0 0x10000>;
+			interrupts = <GIC_SPI 310 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 313 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 314 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 315 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 316 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 319 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 322 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 324 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 326 IRQ_TYPE_LEVEL_HIGH>;
+			fsl,num-rx-queues = <3>;
+			clocks = <&soc_xgbeclk>, <&soc_xgbeclk>, <&soc_xgbeclk>;
+			clock-names = "dma_clk", "ptp_clk", "xgbe_clk";
+			phy-mode = "xgmii";
+			be,pcs-mode = "KX4";
+			status = "disabled";
+			amd,per-channel-interrupt;
+			amd,speed-set = <0>;
+			#stream-id-cells = <16>;
+		};
+
+		stmmac_axi_setup: stmmac-axi-config {
+			snps,wr_osr_lmt = <0x0>;
+			snps,rd_osr_lmt = <0x0>;
+			snps,blen = <0 0 0 0 0 0 4>;
+		};
+
+		gmac0: ethernet@30240000 {
+			compatible = "baikal,bm1000-gmac";
+			reg = <0x0 0x30240000 0x0 0x10000>;
+			interrupts = <GIC_SPI 291 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			max-speed = <1000>;
+			clocks = <&soc_ethclk>, <&xgbe_cmu0 10>;
+			clock-names = "stmmaceth", "tx2_clk";
+			snps,axi-config = <&stmmac_axi_setup>;
+			snps,fixed-burst;
+			snps,no-pbl-x8;
+			snps,txpbl = <4>;
+			snps,rxpbl = <4>;
+			status = "disabled";
+			dma-coherent;
+
+			mdio0: mdio {
+				compatible = "snps,dwmac-mdio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		gmac1: ethernet@30250000 {
+			compatible = "baikal,bm1000-gmac";
+			reg = <0x0 0x30250000 0x0 0x10000>;
+			interrupts = <GIC_SPI 292 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			max-speed = <1000>;
+			clocks = <&soc_ethclk>, <&xgbe_cmu0 13>;
+			clock-names = "stmmaceth", "tx2_clk";
+			snps,axi-config = <&stmmac_axi_setup>;
+			snps,fixed-burst;
+			snps,no-pbl-x8;
+			snps,txpbl = <4>;
+			snps,rxpbl = <4>;
+			status = "disabled";
+			dma-coherent;
+
+			mdio1: mdio {
+				compatible = "snps,dwmac-mdio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		vdu: vdu@202d0000 {
+			compatible = "baikal,vdu";
+			reg = <0x0 0x202d0000 0x0 0x1000>,
+			      <0x0 0x30260000 0x0 0x1000>;
+			reg-names = "lvds_regs", "hdmi_regs";
+			interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+				<GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "lvds_irq", "hdmi_irq";
+			clocks = <&avlsp_cmu1_div7>, <&xgbe_cmu1 0>;
+			clock-names = "lvds_pclk", "hdmi_pclk";
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				port@0 {
+					reg = <0>;
+					vdu_hdmi_out: endpoint {
+						remote-endpoint = <&hdmi_tx_in>;
+					};
+				};
+			};
+
+		};
+
+		hdmi: hdmi@30280000 {
+			compatible = "baikal,hdmi";
+			reg = <0x0 0x30280000 0x0 0x20000>;
+			reg-io-width = <4>;
+			interrupts = <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&xgbe_cmu0 0>, <&xgbe_cmu0 17>;
+			clock-names = "iahb", "isfr";
+			status = "disabled";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				port@0 {
+					reg = <0>;
+					hdmi_tx_in: endpoint {
+						remote-endpoint = <&vdu_hdmi_out>;
+					};
+				};
+				port@1 {
+					reg = <1>;
+					hdmi_tx_out: endpoint {
+						remote-endpoint = <&hdmi_con>;
+					};
+				};
+			};
+		};
+
+		pvt_cluster0: pvt@28200000 {
+			compatible = "baikal,bm1000-pvt";
+			reg = <0x0 0x28200000 0x0 0x1000>;
+			interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_cluster1: pvt@c200000 {
+			compatible = "baikal,bm1000-pvt";
+			reg = <0x0 0xc200000 0x0 0x1000>;
+			interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_cluster2: pvt@a200000 {
+			compatible = "baikal,bm1000-pvt";
+			reg = <0x0 0xa200000 0x0 0x1000>;
+			interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_cluster3: pvt@26200000 {
+			compatible = "baikal,bm1000-pvt";
+			reg = <0x0 0x26200000 0x0 0x1000>;
+			interrupts = <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_mali: pvt@2a060000 {
+			compatible = "baikal,bm1000-pvt";
+			reg = <0x0 0x2a060000 0x0 0x1000>;
+			interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+	};
+
+	hdmi-out {
+		compatible = "hdmi-connector";
+		label = "HDMI0 OUT";
+		type = "a";
+
+		port {
+			hdmi_con: endpoint {
+				remote-endpoint = <&hdmi_tx_out>;
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-clocks.dtsi b/arch/arm64/boot/dts/baikal/bs1000-clocks.dtsi
new file mode 100644
index 0000000000000..585c84ae9fa1a
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-clocks.dtsi
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for BE-S1000 SoC clocks
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+#define CLK_TYPE_NO	0
+#define CLK_TYPE_REFCLK	1	/* fout = 25 MHz or 100 MHz		*/
+#define CLK_TYPE_PLL	2	/* fout = fref * NF / (NR * OD)		*/
+#define CLK_TYPE_CLKCH	3	/* fout = fref / (VAL_CLKDIV + 1)	*/
+#define CLK_TYPE_CLKEN	4	/* fout = fref / DIV_VAL		*/
+#define CLK_TYPE_CLKDIV	5	/* fout = fref / 2^(DIV_VAL - 1)	*/
+
+#define CA75_CLOCKS(num, base)						\
+	/* pll */							\
+	cluster##num##_cmu0_pll:					\
+	cluster##num##_cmu0_pll {					\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x0)>;					\
+		type = <CLK_TYPE_PLL>;					\
+		clocks = <&clk_ref>;					\
+		#clock-cells = <0>;					\
+		/*clock-frequency = <1600000000>;*/			\
+		clock-output-names = "cmu0_pll";			\
+	};								\
+	cluster##num##_cmu1_pll:					\
+	cluster##num##_cmu1_pll {					\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x10000)>;				\
+		type = <CLK_TYPE_PLL>;					\
+		clocks = <&clk_ref>;					\
+		#clock-cells = <0>;					\
+		/*clock-frequency = <2000000000>;*/			\
+		clock-output-names = "cmu1_pll";			\
+	};								\
+	/* clkch */							\
+	cluster##num##_cmu0:						\
+	cluster##num##_cmu0 {						\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x100)>;					\
+		type = <CLK_TYPE_CLKCH>;				\
+		clocks = <&cluster##num##_cmu0_pll>;			\
+		#clock-cells = <1>;					\
+		clock-output-names =					\
+			"sclk",		/* 0 */				\
+			"periph",	/* 1 */				\
+			"pclk",		/* 2 */				\
+			"at",		/* 3 */				\
+			"gic",		/* 4 */				\
+			"cfg_int",	/* 5 */				\
+			"cfg_out",	/* 6 */				\
+			"bisr",		/* 7 */				\
+			"crp";		/* 8 */				\
+	};								\
+	/* clken */							\
+	cluster##num##_cmu0_en0:					\
+	cluster##num##_cmu0_en0 {					\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x500)>;					\
+		type = <CLK_TYPE_CLKEN>;				\
+		clocks = <&cluster##num##_cmu0 0>; /* "sclk" */		\
+		#clock-cells = <0>;					\
+		clock-output-names = "clken";				\
+	};								\
+	cluster##num##_cmu0_en1:					\
+	cluster##num##_cmu0_en1 {					\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x510)>;					\
+		type = <CLK_TYPE_CLKEN>;				\
+		clocks = <&cluster##num##_cmu0 1>; /* "periph" */	\
+		#clock-cells = <1>;					\
+		clock-output-names =					\
+			"cntclken",	/* 0 */				\
+			"tsclken";	/* 1 */				\
+	};								\
+	/* core */							\
+	cluster##num##_cmu1:						\
+	cluster##num##_cmu1 {						\
+		compatible = "baikal,bs1000-cmu";			\
+		reg = <(base + 0x10020)>;				\
+		type = <CLK_TYPE_CLKDIV>;				\
+		clocks = <&cluster##num##_cmu1_pll>;			\
+		#clock-cells = <1>;					\
+		clock-output-names =					\
+			"core0",	/* 0 */				\
+			"core1",	/* 1 */				\
+			"core2",	/* 2 */				\
+			"core3";	/* 3 */				\
+	}
+
+/ {
+	soc {
+		clocks {
+			compatible = "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			/* TODO: it is fixed-clocks for QEMU, remove it */
+			apb_clk: apb_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <200000000>;
+				clock-output-names = "apb_pclk";
+			};
+			osc25: oscillator25 {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <25000000>;
+				clock-output-names = "osc25";
+			};
+			osc27: oscillator27 {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <27000000>;
+				clock-output-names = "osc27";
+			};
+			uart1_clk: uart1_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <8000000>;
+				clock-output-names = "uart1_clk";
+			};
+			uart2_clk: uart2_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <8000000>;
+				clock-output-names = "uart2_clk";
+			};
+			uart_clk: uart_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <7372800>;
+				clock-output-names = "soc_uartclk";
+			};
+			i2c_clk: i2c_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <200000000>;
+				clock-output-names = "soc_i2cclk";
+			};
+			timer1_clk: timer1_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "soc_timer1clk";
+			};
+			timer2_clk: timer2_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "soc_timer2clk";
+			};
+			timer3_clk: timer3_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "soc_timer3clk";
+			};
+			timer4_clk: timer4_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "soc_timer4clk";
+			};
+			wdt_clk: wdt_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <1000000>;
+				clock-output-names = "wdtclk";
+			};
+			gpio_clk: gpio_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <1000000>;
+				clock-output-names = "soc_gpioclk";
+			};
+			spi_clk: spi_clk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "soc_spiclk";
+			};
+			soc_ethclk: ethclk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "eth_clk";
+			};
+			soc_xgbeclk: xgbeclk {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <156250000>;
+				clock-output-names = "xgbe_clk";
+			};
+			soc_smc50mhz: clk50mhz {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <50000000>;
+				clock-output-names = "smc_clk";
+			};
+			soc_faxiclk: refclk400mhz {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <400000000>;
+				clock-output-names = "faxi_clk";
+			};
+			soc_tmp_clk: refclkXXXmhz {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <100000000>;
+				clock-output-names = "tmpclk";
+			};
+			clk_ahb: clk_ahb {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <100000000>;
+				clock-output-names = "clk_ahb";
+			};
+			clk_xin: clk_xin {
+				compatible = "fixed-clock";
+				#clock-cells = <0>;
+				clock-frequency = <100000000>;
+				clock-output-names = "clk_xin";
+			};
+
+			/* CLK_REF */
+			clk_ref: clk_ref {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0xdead>;
+				type = <CLK_TYPE_REFCLK>;
+				#clock-cells = <0>;
+				//clock-frequency = <25000000>; // 25 or 100 MHz
+				clock-output-names = "clk_ref";
+			};
+
+			/* CA75 */
+			CA75_CLOCKS(0, 0x04000000);
+			CA75_CLOCKS(1, 0x08000000);
+			CA75_CLOCKS(2, 0x0c000000);
+			CA75_CLOCKS(3, 0x10000000);
+			CA75_CLOCKS(4, 0x14000000);
+			CA75_CLOCKS(5, 0x18000000);
+			CA75_CLOCKS(6, 0x1c000000);
+			CA75_CLOCKS(7, 0x20000000);
+			CA75_CLOCKS(8, 0x24000000);
+			CA75_CLOCKS(9, 0x28000000);
+			CA75_CLOCKS(10,0x2c000000);
+			CA75_CLOCKS(11,0x30000000);
+
+			/* PCIe */
+			/* pci.pll */
+			cmu_pci_0_pll: cmu_pci_0_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x38000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2000000000>; */
+				clock-output-names = "pci_0_pll";
+			};
+			cmu_pci_1_pll: cmu_pci_1_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x3c000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2000000000>; */
+				clock-output-names = "pci_1_pll";
+			};
+			cmu_pci_2_pll: cmu_pci_2_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x44000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2000000000>; */
+				clock-output-names = "pci_2_pll";
+			};
+			cmu_pci_3_pll: cmu_pci_3_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x48000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2000000000>; */
+				clock-output-names = "pci_3_pll";
+			};
+			cmu_pci_4_pll: cmu_pci_4_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x4c000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2000000000>; */
+				clock-output-names = "pci_4_pll";
+			};
+
+			/* pci.clkch */
+			cmu_pci_0: cmu_pci_0 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x38000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_pci_0_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"nic_cfg0m_ssa_x16p0s",		// 0
+					"nic_cfg1m_ssa_x8p1s",		// 1
+					"nic_cfg2m_mmu_tcus",		// 2
+					"nic_cfg3m_ssa_apbs",		// 3
+					"nic_cfg4m_nic_slv_gpvs",	// 4
+					"nic_slv0m_ssa_x16p0s",		// 5
+					"nic_slv1m_ssa_x8p1s",		// 6
+					"ssa_x16p0m_nic_mstr0s",	// 7
+					"ssa_x8p1m_nic_mstr1s",		// 8
+					"nic_mstrm_mmu_tbu0s",		// 9
+					"a4sw_intern",			// 10
+					"phy_ref",			// 11
+					"ssa_aux",			// 12
+					"ahb2axim_axi2ahbs",		// 13
+					"ahb2ahbm_outp_ahbs",		// 14
+					"ssa_x16p0m_cxlas";		// 15
+			};
+			cmu_pci_1: cmu_pci_1 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x3c000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_pci_1_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"nic_cfg0m_ssa_x16p0s",		// 0
+					"nic_cfg1m_ssa_x8p1s",		// 1
+					"nic_cfg2m_mmu_tcus",		// 2
+					"nic_cfg3m_ssa_apbs",		// 3
+					"nic_cfg4m_nic_slv_gpvs",	// 4
+					"nic_slv0m_ssa_x16p0s",		// 5
+					"nic_slv1m_ssa_x8p1s",		// 6
+					"ssa_x16p0m_nic_mstr0s",	// 7
+					"ssa_x8p1m_nic_mstr1s",		// 8
+					"nic_mstrm_mmu_tbu0s",		// 9
+					"a4sw_intern",			// 10
+					"phy_ref",			// 11
+					"ssa_aux",			// 12
+					"ahb2axim_axi2ahbs",		// 13
+					"ahb2ahbm_outp_ahbs",		// 14
+					"ssa_x16p0m_cxlas";		// 15
+			};
+			cmu_pci_2: cmu_pci_2 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x44000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_pci_2_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"a", // 0
+					"b"; // 1
+			};
+			cmu_pci_3: cmu_pci_3 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x48000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_pci_3_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"a", // 0
+					"b"; // 1
+			};
+			cmu_pci_4: cmu_pci_4 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x4c000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_pci_4_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"a", // 0
+					"b"; // 1
+			};
+
+			/* DDR */
+			/* ddr.pll */
+			cmu_ddr_0_pll: cmu_ddr_0_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x50000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_0_pll";
+			};
+			cmu_ddr_1_pll: cmu_ddr_1_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x54000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_1_pll";
+			};
+			cmu_ddr_2_pll: cmu_ddr_2_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x58000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_3_pll";
+			};
+			cmu_ddr_3_pll: cmu_ddr_3_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x60000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_3_pll";
+			};
+			cmu_ddr_4_pll: cmu_ddr_4_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x64000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_4_pll";
+			};
+			cmu_ddr_5_pll: cmu_ddr_5_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x68000000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <2200000000>; */
+				clock-output-names = "ddr_5_pll";
+			};
+
+			/* ddr.clkch */
+			cmu_ddr_0: cmu_ddr_0 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x50000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_0_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv";		// 2
+			};
+			cmu_ddr_1: cmu_ddr_1 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x54000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_1_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv",		// 2
+					"axi_ext_cfg",		// 3
+					"axi_middle_cfg",	// 4
+					"axi_top_cfg",		// 5
+					"axi_bottom_cfg";	// 6
+			};
+			cmu_ddr_2: cmu_ddr_2 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x58000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_2_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv";		// 2
+			};
+			cmu_ddr_3: cmu_ddr_3 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x60000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_3_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv";		// 2
+			};
+			cmu_ddr_4: cmu_ddr_4 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x64000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_4_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv",		// 2
+					"axi_ext_cfg",		// 3
+					"axi_middle_cfg",	// 4
+					"axi_top_cfg",		// 5
+					"axi_bottom_cfg";	// 6
+			};
+			cmu_ddr_5: cmu_ddr_5 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x68000020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_ddr_5_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"ddr_ctrl_phy",		// 0
+					"ddr_axi_slv",		// 1
+					"ddr_apb_slv";		// 2
+			};
+
+			/* System Control */
+			/* sc.pll */
+			cmu_sc_0_pll: cmu_sc_0_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00400000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <1200000000>; */
+				clock-output-names = "sc_0_pll";
+			};
+			cmu_sc_1_pll: cmu_sc_1_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00410000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <1200000000>; */
+				clock-output-names = "sc_1_pll";
+			};
+			cmu_sc_2_pll: cmu_sc_2_pll {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00420000>;
+				type = <CLK_TYPE_PLL>;
+				clocks = <&clk_ref>;
+				#clock-cells = <0>;
+				/* clock-frequency = <1500000000>; */
+				clock-output-names = "sc_2_pll";
+			};
+
+			/* sc.clkch */
+			cmu_sc_0: cmu_sc_0 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00400020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_sc_0_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"cmn",			// 0
+					"cmn_cs";		// 1
+			};
+			cmu_sc_1: cmu_sc_1 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00410020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_sc_1_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"apb_cfg",		// 0
+					"scp_efuse",		// 1
+					"scp_ext_cfg",		// 2
+					"scp_apb_spi",		// 3
+					"scp_uart",		// 4
+					"scp_gpio8_x16",	// 5, additional divisor by 16
+					"scp_spi",		// 6
+					"scp_smbus_i2c0",	// 7
+					"scp_smbus_i2c1",	// 8
+					"gmac1_apb",		// 9
+					"gmac1_axi",		// 10
+					"gmac2_apb",		// 11
+					"gmac2_axi",		// 12
+					"usb_h",		// 13
+					"usb_ohci",		// 14
+					"lsp_apb",		// 15
+					"lsp_gpio_x16",		// 16, additional divisor by 16
+					"lsp_uart",		// 17
+					"lsp_uart_arm1",	// 18
+					"lsp_uart_arm2",	// 19
+					"lsp_spi1",		// 20
+					"lsp_spi2",		// 21
+					"lsp_espi",		// 22
+					"lsp_smbus_i2c2",	// 23
+					"lsp_smbus_i2c3",	// 24
+					"lsp_smbus_i2c4",	// 25
+					"lsp_smbus_i2c5",	// 26
+					"lsp_smbus_i2c6",	// 27
+					"lsp_timer1",		// 28
+					"lsp_timer2",		// 29
+					"lsp_timer3",		// 30
+					"lsp_timer4",		// 31
+					"lsp_wdt_x16",		// 32, additional divisor by 16
+					"tcu",			// 33
+					"sc_axi_cfg",		// 34
+					"ca75_ahb_cfg";		// 35
+			};
+			cmu_sc_2: cmu_sc_2 {
+				compatible = "baikal,bs1000-cmu";
+				reg = <0x00420020>;
+				type = <CLK_TYPE_CLKCH>;
+				clocks = <&cmu_sc_2_pll>;
+				#clock-cells = <1>;
+				clock-output-names =
+					"scp",			// 0
+					"gmac1_ptp",		// 1
+					"gmac1_txx2",		// 2
+					"gmac2_ptp",		// 3
+					"gmac2_txx2",		// 4
+					"gic_distr",		// 5
+					"dbg",			// 6
+					"cnt_valueb",		// 7
+					"ts_valueb",		// 8
+					"lpd",			// 9
+					"tbu",			// 10
+					"hs_cfg",		// 11
+					"scp_bisr";		// 12
+			};
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-espi.dtsi b/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-espi.dtsi
new file mode 100644
index 0000000000000..0ef4c1c7d92f2
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-espi.dtsi
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for DBS socket X6.16 mezzanine board
+ * Copyright (C) 2023 Baikal Electronics, JSC
+ */
+
+&espi {
+	num-cs = <4>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "rohm,dh2228fv";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+
+	flash@1 {
+		compatible = "rohm,dh2228fv";
+		reg = <1>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+
+	flash@2 {
+		compatible = "rohm,dh2228fv";
+		reg = <2>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+
+	flash@3 {
+		compatible = "rohm,dh2228fv";
+		reg = <3>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-qspi2.dtsi b/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-qspi2.dtsi
new file mode 100644
index 0000000000000..929210b0692cf
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-dbs-mezzanine-qspi2.dtsi
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for DBS socket X11.16 mezzanine board
+ * Copyright (C) 2023 Baikal Electronics, JSC
+ */
+
+&qspi2 {
+	num-cs = <2>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "rohm,dh2228fv";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+
+	flash@1 {
+		compatible = "rohm,dh2228fv";
+		reg = <1>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+	};
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-dbs-ov.dts b/arch/arm64/boot/dts/baikal/bs1000-dbs-ov.dts
new file mode 100644
index 0000000000000..86113d20e32a7
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-dbs-ov.dts
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for DBS-OV compatible boards:
+ *   - DBS-OV vA0
+ *
+ * Copyright (C) 2022-2023 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bs1000-dbs.dtsi"
+
+/ {
+	model = "Baikal Electronics DBS-OV";
+	compatible = "baikal,dbs-ov", "baikal,bs1000";
+};
+
+&pcie0_p0 {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+&pcie0_p0_ep {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+/delete-node/ &pcie0_p0_ep;
+
+&pcie1_p0 {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+&pcie1_p0_ep {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+/delete-node/ &pcie1_p0_ep;
+
+&pcie2_p0 {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+&pcie2_p0_ep {
+	num-lanes = <16>;
+	status = "okay";
+};
+
+/delete-node/ &pcie2_p0_ep;
+
+&pcie3_p0 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie3_p2 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie3_p3 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p0 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie4_p2 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p3 {
+	num-lanes = <4>;
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-dbs.dts b/arch/arm64/boot/dts/baikal/bs1000-dbs.dts
new file mode 100644
index 0000000000000..b260ac03c2c7c
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-dbs.dts
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for DBS compatible boards:
+ *   - DBS vA0
+ *
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bs1000-dbs.dtsi"
+
+/ {
+	model = "Baikal Electronics DBS";
+	compatible = "baikal,dbs", "baikal,bs1000";
+};
+
+&pcie0_p0 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie0_p0_ep {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+/delete-node/ &pcie0_p0_ep;
+
+&pcie0_p1 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie1_p0 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie1_p0_ep {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+/delete-node/ &pcie1_p0_ep;
+
+&pcie1_p1 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie2_p0 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie2_p0_ep {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+/delete-node/ &pcie2_p0_ep;
+
+&pcie2_p1 {
+	num-lanes = <8>;
+	status = "okay";
+};
+
+&pcie3_p0 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie3_p1 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie3_p2 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie3_p3 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p0 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p1 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p2 {
+	num-lanes = <4>;
+	status = "okay";
+};
+
+&pcie4_p3 {
+	num-lanes = <4>;
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-dbs.dtsi b/arch/arm64/boot/dts/baikal/bs1000-dbs.dtsi
new file mode 100644
index 0000000000000..25b7001bd5383
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-dbs.dtsi
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for DBS-compatible boards
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+#include "bs1000.dtsi"
+
+/ {
+	aliases {
+		ethernet0 = &gmac0;
+		ethernet1 = &gmac1;
+	};
+
+	chosen { };
+};
+
+&ehci {
+	status = "okay";
+};
+
+&gmac0 {
+	status = "okay";
+	phy-handle = <&ethphy0>;
+	phy-mode = "rgmii-rxid";
+};
+
+&gmac1 {
+	status = "okay";
+	phy-handle = <&ethphy1>;
+	phy-mode = "rgmii-rxid";
+};
+
+&gpio32 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+};
+
+&i2c4 {
+	status = "okay";
+};
+
+&mdio0 {
+	ethphy0: ethernet-phy@1 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x1>;
+	};
+};
+
+&mdio1 {
+	ethphy1: ethernet-phy@1 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x1>;
+	};
+};
+
+&mux {
+	status = "okay";
+};
+
+&mux0 {
+	status = "okay";
+};
+
+&mux1 {
+	status = "okay";
+};
+
+&mux2 {
+	status = "okay";
+};
+
+&ohci {
+	status = "okay";
+};
+
+&pvt_cluster0 {
+	status = "okay";
+};
+
+&pvt_cluster1 {
+	status = "okay";
+};
+
+&pvt_cluster2 {
+	status = "okay";
+};
+
+&pvt_cluster3 {
+	status = "okay";
+};
+
+&pvt_cluster4 {
+	status = "okay";
+};
+
+&pvt_cluster5 {
+	status = "okay";
+};
+
+&pvt_cluster6 {
+	status = "okay";
+};
+
+&pvt_cluster7 {
+	status = "okay";
+};
+
+&pvt_cluster8 {
+	status = "okay";
+};
+
+&pvt_cluster9 {
+	status = "okay";
+};
+
+&pvt_cluster10 {
+	status = "okay";
+};
+
+&pvt_cluster11 {
+	status = "okay";
+};
+
+&pvt_ddr0 {
+	status = "okay";
+};
+
+&pvt_ddr1 {
+	status = "okay";
+};
+
+&pvt_ddr2 {
+	status = "okay";
+};
+
+&pvt_ddr3 {
+	status = "okay";
+};
+
+&pvt_ddr4 {
+	status = "okay";
+};
+
+&pvt_ddr5 {
+	status = "okay";
+};
+
+&pvt_pcie0 {
+	status = "okay";
+};
+
+&pvt_pcie1 {
+	status = "okay";
+};
+
+&pvt_pcie2 {
+	status = "okay";
+};
+
+&pvt_pcie3 {
+	status = "okay";
+};
+
+&pvt_pcie4 {
+	status = "okay";
+};
+
+&qspi1 {
+	num-cs = <4>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "bl1";
+				reg = <0x000000 0x40000>;
+			};
+
+			partition@40000 {
+				label = "dtb";
+				reg = <0x040000 0x40000>;
+			};
+
+			partition@80000 {
+				label = "uefi-vars";
+				reg = <0x080000 0xc0000>;
+			};
+
+			partition@140000 {
+				label = "fip";
+				reg = <0x140000 0x6c0000>;
+			};
+		};
+	};
+};
+
+&timer1 {
+	status = "okay";
+};
+
+&timer2 {
+	status = "okay";
+};
+
+&timer3 {
+	status = "okay";
+};
+
+&timer4 {
+	status = "okay";
+};
+
+&uart_a1 {
+	status = "okay";
+};
+
+&uart_a2 {
+	status = "okay";
+};
+
+&uart_s {
+	status = "okay";
+};
+
+&wdt {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000-qemu-s.dts b/arch/arm64/boot/dts/baikal/bs1000-qemu-s.dts
new file mode 100644
index 0000000000000..982618a7249c9
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000-qemu-s.dts
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree source for Baikal Electronics QEMU-S virtual platform
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+/dts-v1/;
+
+#include "bs1000.dtsi"
+
+/ {
+	model = "Baikal Electronics QEMU-S";
+	compatible = "baikal,qemu-s", "baikal,bs1000";
+
+	aliases {
+		ethernet0 = &gmac0;
+		ethernet1 = &gmac1;
+	};
+
+	chosen { };
+
+	/*
+	 * Device is necessary for UEFI to boot on QEMU,
+	 * need to replace it with something later.
+	 */
+	flash@0 {
+		compatible = "cfi-flash";
+		reg = <0x0 0x4000000 0x0 0x4000000>;
+		bank-width = <0x4>;
+	};
+};
+
+&ehci {
+	status = "okay";
+};
+
+&gmac0 {
+	status = "okay";
+	phy-handle = <&ethphy0>;
+	phy-mode = "rgmii-rxid";
+};
+
+&gmac1 {
+	status = "okay";
+	phy-handle = <&ethphy1>;
+	phy-mode = "rgmii-rxid";
+};
+
+&mdio0 {
+	ethphy0: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+	};
+};
+
+&mdio1 {
+	ethphy1: ethernet-phy@3 {
+		compatible = "ethernet-phy-ieee802.3-c22";
+		reg = <0x3>;
+	};
+};
+
+&ohci {
+	status = "okay";
+};
+
+&qspi1 {
+	num-cs = <4>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <10000000>;
+		spi-tx-bus-width = <1>;
+		spi-rx-bus-width = <1>;
+		status = "okay";
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "bl1";
+				reg = <0x000000 0x40000>;
+			};
+
+			partition@40000 {
+				label = "dtb";
+				reg = <0x040000 0x40000>;
+			};
+
+			partition@80000 {
+				label = "uefi-vars";
+				reg = <0x080000 0xc0000>;
+			};
+
+			partition@140000 {
+				label = "fip";
+				reg = <0x140000 0x6c0000>;
+			};
+		};
+	};
+};
+
+&timer1 {
+	status = "okay";
+};
+
+&uart_a1 {
+	status = "okay";
+};
+
+&uart_a2 {
+	status = "okay";
+};
+
+&uart_s {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/baikal/bs1000.dtsi b/arch/arm64/boot/dts/baikal/bs1000.dtsi
new file mode 100644
index 0000000000000..76fd2ef420139
--- /dev/null
+++ b/arch/arm64/boot/dts/baikal/bs1000.dtsi
@@ -0,0 +1,1190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device tree include file for BE-S1000 SoC
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include "bs1000-clocks.dtsi"
+
+#define CA75_CLUSTER(group, a,b,c,d)			\
+	cluster##group:					\
+	cluster##group {				\
+		core0 {					\
+			cpu = <&cpu##a>;		\
+		};					\
+		core1 {					\
+			cpu = <&cpu##b>;		\
+		};					\
+		core2 {					\
+			cpu = <&cpu##c>;		\
+		};					\
+		core3 {					\
+			cpu = <&cpu##d>;		\
+		};					\
+		l3_##group: l3-cache##group {		\
+			compatible = "cache";		\
+			cache-size = <0x200000>;	\
+			cache-line-size = <64>;		\
+			cache-sets = <2048>;		\
+			cache-unified;			\
+			cache-level = <3>;		\
+			next-level-cache = <&l4>;	\
+		};					\
+	}
+
+#define CA75_CPU(core, group, affinity)			\
+	cpu##core: cpu@##affinity {			\
+		device_type = "cpu";			\
+		compatible = "arm,cortex-a75";		\
+		reg = <0x0 0x##affinity>;		\
+		enable-method = "psci";			\
+		i-cache-size = <0x10000>;		\
+		i-cache-line-size = <64>;		\
+		i-cache-sets = <256>;			\
+		d-cache-size = <0x10000>;		\
+		d-cache-line-size = <64>;		\
+		d-cache-sets = <64>;			\
+		next-level-cache = <&l2_##core>;	\
+		operating-points-v2 = <&cpu_opp_table>;		\
+		clocks = <&cluster##group##_cmu1 (core%4)>;	\
+		l2_##core: l2-cache {				\
+			compatible = "cache";			\
+			cache-size = <0x80000>;			\
+			cache-line-size = <64>;			\
+			cache-sets = <1024>;			\
+			cache-unified;				\
+			cache-level = <2>;			\
+			next-level-cache = <&l3_##group>;	\
+		};						\
+	}
+
+/ {
+	compatible = "baikal,bs1000";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&gic>;
+
+	aliases {
+		i2c2	= &i2c2;
+		i2c3	= &i2c3;
+		i2c4	= &i2c4;
+		i2c5	= &i2c5;
+		i2c6	= &i2c6;
+		serial0	= &uart_a1;
+		serial1	= &uart_a2;
+		serial2	= &uart_s;
+	};
+
+	psci {
+		compatible = "arm,psci-1.0", "arm,psci-0.2";
+		method = "smc";
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu-map {
+			CA75_CLUSTER(0,  0, 1, 2, 3 );
+			CA75_CLUSTER(1,  4, 5, 6, 7 );
+			CA75_CLUSTER(2,  8, 9, 10,11);
+			CA75_CLUSTER(3,  12,13,14,15);
+			CA75_CLUSTER(4,  16,17,18,19);
+			CA75_CLUSTER(5,  20,21,22,23);
+			CA75_CLUSTER(6,  24,25,26,27);
+			CA75_CLUSTER(7,  28,29,30,31);
+			CA75_CLUSTER(8,  32,33,34,35);
+			CA75_CLUSTER(9,  36,37,38,39);
+			CA75_CLUSTER(10, 40,41,42,43);
+			CA75_CLUSTER(11, 44,45,46,47);
+		};
+
+		CA75_CPU(0,0,0);
+		CA75_CPU(1,0,100);
+		CA75_CPU(2,0,200);
+		CA75_CPU(3,0,300);
+		CA75_CPU(4,1,10000);
+		CA75_CPU(5,1,10100);
+		CA75_CPU(6,1,10200);
+		CA75_CPU(7,1,10300);
+		CA75_CPU(8,2,20000);
+		CA75_CPU(9,2,20100);
+		CA75_CPU(10,2,20200);
+		CA75_CPU(11,2,20300);
+		CA75_CPU(12,3,30000);
+		CA75_CPU(13,3,30100);
+		CA75_CPU(14,3,30200);
+		CA75_CPU(15,3,30300);
+		CA75_CPU(16,4,40000);
+		CA75_CPU(17,4,40100);
+		CA75_CPU(18,4,40200);
+		CA75_CPU(19,4,40300);
+		CA75_CPU(20,5,50000);
+		CA75_CPU(21,5,50100);
+		CA75_CPU(22,5,50200);
+		CA75_CPU(23,5,50300);
+		CA75_CPU(24,6,60000);
+		CA75_CPU(25,6,60100);
+		CA75_CPU(26,6,60200);
+		CA75_CPU(27,6,60300);
+		CA75_CPU(28,7,70000);
+		CA75_CPU(29,7,70100);
+		CA75_CPU(30,7,70200);
+		CA75_CPU(31,7,70300);
+		CA75_CPU(32,8,80000);
+		CA75_CPU(33,8,80100);
+		CA75_CPU(34,8,80200);
+		CA75_CPU(35,8,80300);
+		CA75_CPU(36,9,90000);
+		CA75_CPU(37,9,90100);
+		CA75_CPU(38,9,90200);
+		CA75_CPU(39,9,90300);
+		CA75_CPU(40,10,a0000);
+		CA75_CPU(41,10,a0100);
+		CA75_CPU(42,10,a0200);
+		CA75_CPU(43,10,a0300);
+		CA75_CPU(44,11,b0000);
+		CA75_CPU(45,11,b0100);
+		CA75_CPU(46,11,b0200);
+		CA75_CPU(47,11,b0300);
+
+		l4: l4-cache {
+			cache-size = <0x2000000>;
+			cache-unified;
+			cache-level = <4>;
+		};
+	};
+
+	cpu_opp_table: opp_table {
+		compatible = "operating-points-v2";
+		opp-250000000 {
+			opp-hz = /bits/ 64 <250000000>;
+		};
+		opp-500000000 {
+			opp-hz = /bits/ 64 <500000000>;
+		};
+		opp-1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+		};
+		opp-2000000000 {
+			opp-hz = /bits/ 64 <2000000000>;
+		};
+	};
+
+	pmu {
+		compatible = "arm,cortex-a75-pmu", "arm,armv8-pmuv3";
+		interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_LOW>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		ohci: usb@a00000 {
+			compatible = "generic-ohci";
+			reg = <0x0 0xa00000 0x0 0x1000>;
+			interrupts = <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		ehci: usb@a10000 {
+			compatible = "generic-ehci";
+			reg = <0x0 0xa10000 0x0 0x1000>;
+			interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		gmac0: ethernet@a20000 {
+			compatible = "baikal,bs1000-gmac";
+			reg = <0x0 0xa20000 0x0 0x10000>;
+			interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			clocks = <&cmu_sc_1 9>, <&cmu_sc_2 1>, <&cmu_sc_2 2>;
+			clock-names = "stmmaceth", "ptp_ref", "tx2_clk";
+			dma-coherent;
+			status = "disabled";
+
+			mdio0: mdio {
+				compatible = "snps,dwmac-mdio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		gmac1: ethernet@a30000 {
+			compatible = "baikal,bs1000-gmac";
+			reg = <0x0 0xa30000 0x0 0x10000>;
+			interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			clocks = <&cmu_sc_1 11>, <&cmu_sc_2 3>, <&cmu_sc_2 4>;
+			clock-names = "stmmaceth", "ptp_ref", "tx2_clk";
+			dma-coherent;
+			status = "disabled";
+
+			mdio1: mdio {
+				compatible = "snps,dwmac-mdio";
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		uart_a1: serial@c00000 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x0 0xc00000 0x0 0x1000>;
+			interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&uart1_clk>, <&apb_clk>;
+			// clocks = <&cmu_sc_1 18>, <&cmu_sc_1 15>; // "lsp_uart_arm1", "lsp_apb"
+			clock-names = "uartclk", "apb_pclk";
+			status = "disabled";
+		};
+
+		uart_a2: serial@c10000 {
+			compatible = "arm,pl011", "arm,primecell";
+			reg = <0x0 0xc10000 0x0 0x1000>;
+			interrupts = <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&uart2_clk>, <&apb_clk>;
+			// clocks = <&cmu_sc_1 19>, <&cmu_sc_1 15>; // "lsp_uart_arm2", "lsp_apb"
+			clock-names = "uartclk", "apb_pclk";
+			status = "disabled";
+		};
+
+		qspi1: spi@c20000 {
+			compatible = "snps,dw-apb-ssi";
+			reg = <0x0 0xc20000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&spi_clk>;
+			// clocks = <&cmu_sc_1 20>; // "lsp_spi1"
+			status = "disabled";
+		};
+
+		gpio32: gpio@c50000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x0 0xc50000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			clocks = <&gpio_clk>;
+			// clocks = <&cmu_sc_1 16>; // "lsp_gpio_x16"
+			status = "disabled";
+
+			porta: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <32>;
+				reg = <0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
+
+		i2c2: i2c@c90000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xc90000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+			i2c-sda-hold-time-ns = <500>;
+			clock-frequency = <400000>;
+			clocks = <&i2c_clk>;
+			// clocks = <&cmu_sc_1 23>; // "lsp_smbus_i2c2"
+			clock-names = "soc_i2cclk";
+			status = "disabled";
+		};
+
+		i2c3: i2c@ca0000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xca0000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>;
+			i2c-sda-hold-time-ns = <500>;
+			clock-frequency = <400000>;
+			clocks = <&i2c_clk>;
+			// clocks = <&cmu_sc_1 24>; // "lsp_smbus_i2c3"
+			clock-names = "soc_i2cclk";
+			status = "disabled";
+		};
+
+		i2c4: i2c@cb0000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x0 0xcb0000 0x0 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
+			i2c-sda-hold-time-ns = <500>;
+			clock-frequency = <400000>;
+			clocks = <&i2c_clk>;
+			// clocks = <&cmu_sc_1 25>; // "lsp_smbus_i2c4"
+			clock-names = "soc_i2cclk";
+			status = "disabled";
+		};
+
+		wdt: watchdog@ce0000 {
+			compatible = "snps,dw-wdt";
+			reg = <0x0 0xce0000 0x0 0x1000>;
+			interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&wdt_clk 0>;
+			// clocks = <&cmu_sc_1 32>; // "lsp_wdt_x16"
+			clock-names = "wdtclk";
+			snps,watchdog-tops = <0x000000ff 0x000001ff 0x000003ff 0x000007ff
+					      0x0000ffff 0x0001ffff 0x0003ffff 0x0007ffff
+					      0x000fffff 0x001fffff 0x003fffff 0x007fffff
+					      0x00ffffff 0x01ffffff 0x03ffffff 0x07ffffff>;
+			status = "disabled";
+		};
+
+		timer1: timer@cf0000 {
+			compatible = "snps,dw-apb-timer-osc";
+			reg = <0x0 0xcf0000 0x0 0x14>;
+			interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <50000000>;
+			clocks = <&timer1_clk>;
+			// clocks = <&cmu_sc_1 28>; // "lsp_timer1"
+			clock-names = "soc_timer1clk";
+			status = "disabled";
+		};
+
+		timer2: timer@cf0014 {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0xcf0014 0x0 0x14>;
+			interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <50000000>;
+			clocks = <&timer2_clk>;
+			// clocks = <&cmu_sc_1 29>; // "lsp_timer2"
+			clock-names = "soc_timer2clk";
+			status = "disabled";
+		};
+
+		timer3: timer@cf0028 {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0xcf0028 0x0 0x14>;
+			interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <50000000>;
+			clocks = <&timer3_clk>;
+			// clocks = <&cmu_sc_1 30>; // "lsp_timer3"
+			clock-names = "soc_timer3clk";
+			status = "disabled";
+		};
+
+		timer4: timer@cf003c {
+			compatible = "snps,dw-apb-timer-sp";
+			reg = <0x0 0xcf003c 0x0 0x14>;
+			interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
+			clock-frequency = <50000000>;
+			clocks = <&timer4_clk>;
+			//clocks = <&cmu_sc_1 31>; // "lsp_timer4"
+			clock-names = "soc_timer4clk";
+			status = "disabled";
+		};
+
+		gic: interrupt-controller@1000000 {
+			compatible = "arm,gic-v3";
+			reg = <0x0 0x1000000 0x0 0x10000>,  /* GICD */
+			      <0x0 0x1240000 0x0 0x600000>; /* GICR */
+			      /* TODO: optional ranges - GICC, GICH, GICV */
+			#interrupt-cells = <3>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			interrupt-controller;
+			interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+
+			its0: interrupt-controller@1040000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1040000 0x0 0x20000>;
+				msi-controller;
+			};
+			its1: interrupt-controller@1060000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1060000 0x0 0x20000>;
+				msi-controller;
+			};
+			its2: interrupt-controller@1080000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1080000 0x0 0x20000>;
+				msi-controller;
+			};
+			its3: interrupt-controller@10a0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x10a0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its4: interrupt-controller@10c0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x10c0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its5: interrupt-controller@10e0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x10e0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its6: interrupt-controller@1100000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1100000 0x0 0x20000>;
+				msi-controller;
+			};
+			its7: interrupt-controller@1120000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1120000 0x0 0x20000>;
+				msi-controller;
+			};
+			its8: interrupt-controller@1140000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1140000 0x0 0x20000>;
+				msi-controller;
+			};
+			its9: interrupt-controller@1160000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1160000 0x0 0x20000>;
+				msi-controller;
+			};
+			its10: interrupt-controller@1180000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1180000 0x0 0x20000>;
+				msi-controller;
+			};
+			its11: interrupt-controller@11a0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x11a0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its12: interrupt-controller@11c0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x11c0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its13: interrupt-controller@11e0000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x11e0000 0x0 0x20000>;
+				msi-controller;
+			};
+			its14: interrupt-controller@1200000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1200000 0x0 0x20000>;
+				msi-controller;
+			};
+			its15: interrupt-controller@1220000 {
+				compatible = "arm,gic-v3-its";
+				reg = <0x0 0x1220000 0x0 0x20000>;
+				msi-controller;
+			};
+		};
+
+		pcie0_p0: pcie@39000000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x39000000 0x0 0x400000>,
+			      <0x00000000 0x38d40000 0x0 0x100>,
+			      <0x00000700 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 145 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00000000 0x701 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x702 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its0>;
+			msi-map = <0x0 &its0 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie0_p0_ep: pcie-ep@39000000 {
+			compatible = "baikal,bs1000-pcie-ep";
+			reg = <0x00000000 0x39000000 0x0 0x400000>,
+			      <0x00000000 0x38d40000 0x0 0x100>,
+			      <0x00000700 0x00000000 0x1 0x00000000>;
+			reg-names = "dbi", "apb", "addr_space";
+			num-ib-windows = <4>;
+			num-ob-windows = <4>;
+			status = "disabled";
+		};
+
+		pcie0_p1: pcie@39400000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x39400000 0x0 0x400000>,
+			      <0x00000000 0x38d41000 0x0 0x100>,
+			      <0x00000740 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00100000 0x741 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x742 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its0>;
+			msi-map = <0x0 &its0 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie1_p0: pcie@3d000000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x3d000000 0x0 0x400000>,
+			      <0x00000000 0x3cd40000 0x0 0x100>,
+			      <0x00000780 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00200000 0x781 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x782 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its2>;
+			msi-map = <0x0 &its2 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie1_p0_ep: pcie-ep@3d000000 {
+			compatible = "baikal,bs1000-pcie-ep";
+			reg = <0x00000000 0x3d000000 0x0 0x400000>,
+			      <0x00000000 0x3cd40000 0x0 0x100>,
+			      <0x00000780 0x00000000 0x1 0x00000000>;
+			reg-names = "dbi", "apb", "addr_space";
+			num-ib-windows = <4>;
+			num-ob-windows = <4>;
+			status = "disabled";
+		};
+
+		pcie1_p1: pcie@3d400000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x3d400000 0x0 0x400000>,
+			      <0x00000000 0x3cd41000 0x0 0x100>,
+			      <0x000007c0 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00300000 0x7c1 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x7c2 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its2>;
+			msi-map = <0x0 &its2 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie2_p0: pcie@45000000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x45000000 0x0 0x400000>,
+			      <0x00000000 0x44d40000 0x0 0x100>,
+			      <0x00000500 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00400000 0x501 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x502 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its4>;
+			msi-map = <0x0 &its4 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie2_p0_ep: pcie-ep@45000000 {
+			compatible = "baikal,bs1000-pcie-ep";
+			reg = <0x00000000 0x45000000 0x0 0x400000>,
+			      <0x00000000 0x44d40000 0x0 0x100>,
+			      <0x00000500 0x00000000 0x1 0x00000000>;
+			reg-names = "dbi", "apb", "addr_space";
+			num-ib-windows = <4>;
+			num-ob-windows = <4>;
+			status = "disabled";
+		};
+
+		pcie2_p1: pcie@45400000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x45400000 0x0 0x400000>,
+			      <0x00000000 0x44d41000 0x0 0x100>,
+			      <0x00000540 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00500000 0x541 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x542 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its4>;
+			msi-map = <0x0 &its4 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie3_p0: pcie@49000000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x49000000 0x0 0x400000>,
+			      <0x00000000 0x48d40000 0x0 0x100>,
+			      <0x00000400 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 249 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 253 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00600000 0x401 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x402 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its6>;
+			msi-map = <0x0 &its6 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie3_p1: pcie@49400000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x49400000 0x0 0x400000>,
+			      <0x00000000 0x48d41000 0x0 0x100>,
+			      <0x00000440 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 250 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00700000 0x441 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x442 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its7>;
+			msi-map = <0x0 &its7 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie3_p2: pcie@49800000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x49800000 0x0 0x400000>,
+			      <0x00000000 0x48d42000 0x0 0x100>,
+			      <0x00000480 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 251 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 255 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00800000 0x481 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x482 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its8>;
+			msi-map = <0x0 &its8 0x20000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie3_p3: pcie@49c00000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x49c00000 0x0 0x400000>,
+			      <0x00000000 0x48d43000 0x0 0x100>,
+			      <0x000004c0 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 252 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00900000 0x4c1 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x4c2 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its9>;
+			msi-map = <0x0 &its9 0x30000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie4_p0: pcie@4d000000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x4d000000 0x0 0x400000>,
+			      <0x00000000 0x4cd40000 0x0 0x100>,
+			      <0x00000600 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 322 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00a00000 0x601 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x602 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its11>;
+			msi-map = <0x0 &its11 0x0 0x10000>;
+			status = "disabled";
+		};
+
+		pcie4_p1: pcie@4d400000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x4d400000 0x0 0x400000>,
+			      <0x00000000 0x4cd41000 0x0 0x100>,
+			      <0x00000640 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 319 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00b00000 0x641 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x642 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its12>;
+			msi-map = <0x0 &its12 0x10000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie4_p2: pcie@4d800000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x4d800000 0x0 0x400000>,
+			      <0x00000000 0x4cd42000 0x0 0x100>,
+			      <0x00000680 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 324 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00c00000 0x681 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x682 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its13>;
+			msi-map = <0x0 &its13 0x20000 0x10000>;
+			status = "disabled";
+		};
+
+		pcie4_p3: pcie@4dc00000 {
+			device_type = "pci";
+			compatible = "baikal,bs1000-pcie";
+			reg = <0x00000000 0x4dc00000 0x0 0x400000>,
+			      <0x00000000 0x4cd43000 0x0 0x100>,
+			      <0x000006c0 0x00000000 0x0 0x10000000>;
+			reg-names = "dbi", "apb", "config";
+			bus-range = <0x0 0xff>;
+			interrupts = <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "intr", "msi";
+			#interrupt-cells = <1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			ranges = <0x81000000 0x0 0x00d00000 0x6c1 0x00000000 0x0 0x100000>,
+				 <0x82000000 0x0 0x40000000 0x6c2 0x00000000 0x0 0x40000000>;
+			num-viewport = <4>;
+			msi-parent = <&its14>;
+			msi-map = <0x0 &its14 0x30000 0x10000>;
+			status = "disabled";
+		};
+
+		ddr0: memory-controller@53000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x53000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 358 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 356 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 354 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_0 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr1: memory-controller@57000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x57000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 375 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 373 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 372 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 370 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_1 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr2: memory-controller@5b000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x5b000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 392 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 390 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 389 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 386 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 388 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 387 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_2 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr3: memory-controller@63000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x63000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 403 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_3 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr4: memory-controller@67000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x67000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 426 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 420 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_4 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		ddr5: memory-controller@6b000000 {
+			compatible = "baikal,bs1000-edac-mc";
+			reg = <0x0 0x6b000000 0x0 0x10000>;
+			interrupts = <GIC_SPI 443 IRQ_TYPE_LEVEL_HIGH>, /* dfi_alert_err */
+				     <GIC_SPI 441 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err */
+				     <GIC_SPI 440 IRQ_TYPE_LEVEL_HIGH>, /* ecc_uncorrected_err */
+				     <GIC_SPI 437 IRQ_TYPE_LEVEL_HIGH>, /* sbr_done */
+				     <GIC_SPI 439 IRQ_TYPE_LEVEL_HIGH>, /* ecc_corrected_err_fault */
+				     <GIC_SPI 438 IRQ_TYPE_LEVEL_HIGH>; /* ecc_uncorrected_err_fault */
+			// clocks = <&cmu_ddr_5 0>;
+			// clock-names = "apb_pclk";
+			status = "disabled";
+		};
+
+		pvt_cluster0: pvt@4030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x4030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster1: pvt@8030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x8030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster2: pvt@c030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0xc030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster3: pvt@10030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x10030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster4: pvt@14030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x14030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster5: pvt@18030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x18030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster6: pvt@1c030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x1c030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster7: pvt@20030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x20030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster8: pvt@24030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x24030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster9: pvt@28030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x28030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster10: pvt@2c030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x2c030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_cluster11: pvt@30030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x30030000 0x0 0x1000>;
+			status = "disabled";
+		};
+		pvt_pcie0: pvt@38030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x38030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_pcie1: pvt@3c030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x3c030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_pcie2: pvt@44030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x44030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_pcie3: pvt@48030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x48030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 275 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_pcie4: pvt@4c030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x4c030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr0: pvt@50030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x50030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 363 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr1: pvt@54030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x54030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 380 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr2: pvt@58030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x58030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr3: pvt@60030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x60030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 414 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr4: pvt@64030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x64030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 431 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+		pvt_ddr5: pvt@68030000 {
+			compatible = "baikal,bs1000-pvt";
+			reg = <0x0 0x68030000 0x0 0x1000>;
+			interrupts = <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		mux: mux-controller {
+			compatible = "baikal,bs1000-lsp-mux";
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+			#mux-control-cells = <1>;
+			status = "disabled";
+
+			qspi2: spi@c30000 {
+				compatible = "snps,dw-apb-ssi";
+				reg = <0x0 0xc30000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&spi_clk>;
+				// clocks = <&cmu_sc_1 21>; // "lsp_spi2"
+			};
+
+			espi: spi@c40000 {
+				compatible = "baikal,bs1000-espi";
+				reg = <0x0 0xc40000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&spi_clk>;
+				// clocks = <&cmu_sc_1 22>; // "lsp_espi"
+			};
+
+			gpio16: gpio@c60000 {
+				compatible = "snps,dw-apb-gpio";
+				reg = <0x0 0xc60000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				clocks = <&gpio_clk>;
+				// clocks = <&cmu_sc_1 16>; // "lsp_gpio_x16"
+
+				portb: gpio-controller@0 {
+					compatible = "snps,dw-apb-gpio-port";
+					gpio-controller;
+					#gpio-cells = <2>;
+					snps,nr-gpios = <16>;
+					reg = <0>;
+					interrupt-controller;
+					#interrupt-cells = <2>;
+					interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
+			gpio8_1: gpio@c70000 {
+				compatible = "snps,dw-apb-gpio";
+				reg = <0x0 0xc70000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				clocks = <&gpio_clk>;
+				// clocks = <&cmu_sc_1 16>; // "lsp_gpio_x16"
+
+				portc: gpio-controller@0 {
+					compatible = "snps,dw-apb-gpio-port";
+					gpio-controller;
+					#gpio-cells = <2>;
+					snps,nr-gpios = <8>;
+					reg = <0>;
+					interrupt-controller;
+					#interrupt-cells = <2>;
+					interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
+			gpio8_2: gpio@c80000 {
+				compatible = "snps,dw-apb-gpio";
+				reg = <0x0 0xc80000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				clocks = <&gpio_clk>;
+				// clocks = <&cmu_sc_1 16>; // "lsp_gpio_x16"
+
+				portd: gpio-controller@0 {
+					compatible = "snps,dw-apb-gpio-port";
+					gpio-controller;
+					#gpio-cells = <2>;
+					snps,nr-gpios = <8>;
+					reg = <0>;
+					interrupt-controller;
+					#interrupt-cells = <2>;
+					interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
+			i2c5: i2c@cc0000 {
+				compatible = "snps,designware-i2c";
+				reg = <0x0 0xcc0000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
+				i2c-sda-hold-time-ns = <500>;
+				clock-frequency = <400000>;
+				clocks = <&i2c_clk>;
+				// clocks = <&cmu_sc_1 26>; // "lsp_smbus_i2c5"
+				clock-names = "soc_i2cclk";
+			};
+
+			i2c6: i2c@cd0000 {
+				compatible = "snps,designware-i2c";
+				reg = <0x0 0xcd0000 0x0 0x1000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+				i2c-sda-hold-time-ns = <500>;
+				clock-frequency = <400000>;
+				clocks = <&i2c_clk>;
+				// clocks = <&cmu_sc_1 27>; // "lsp_smbus_i2c6"
+				clock-names = "soc_i2cclk";
+			};
+
+			uart_s: serial@e00000 {
+				compatible = "snps,dw-apb-uart";
+				reg = <0x0 0xe00000 0x0 0x100>;
+				interrupts = <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>;
+				reg-shift = <2>;
+				reg-io-width = <4>;
+				clocks = <&uart_clk>, <&apb_clk>;
+				// clocks = <&cmu_sc_1 17>, <&cmu_sc_1 15>; // "lsp_uart", "lsp_apb"
+				clock-names = "uartclk", "apb_pclk";
+			};
+		};
+
+		mux0: mux0 {
+			compatible = "baikal,bs1000-lsp-mux-channel";
+			mux-controls = <&mux 0>;
+			mux-state0 = <&gpio8_1>;
+			mux-state1 = <&uart_s &i2c5 &i2c6>;
+			status = "disabled";
+		};
+
+		mux1: mux1 {
+			compatible = "baikal,bs1000-lsp-mux-channel";
+			mux-controls = <&mux 1>;
+			mux-state0 = <&gpio8_2>;
+			mux-state1 = <&qspi2>;
+			status = "disabled";
+		};
+
+		mux2: mux2 {
+			compatible = "baikal,bs1000-lsp-mux-channel";
+			mux-controls = <&mux 2>;
+			mux-state0 = <&gpio16>;
+			mux-state1 = <&espi>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/arm64/configs/baikal_defconfig b/arch/arm64/configs/baikal_defconfig
new file mode 100644
index 0000000000000..2655128e7e28f
--- /dev/null
+++ b/arch/arm64/configs/baikal_defconfig
@@ -0,0 +1,923 @@
+CONFIG_LOCALVERSION="-baikal-arm64"
+CONFIG_DEFAULT_HOSTNAME="baikal"
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_USER_NS=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_SLAB=y
+CONFIG_PROFILING=y
+CONFIG_ARCH_BAIKAL=y
+# CONFIG_ARM64_ERRATUM_826319 is not set
+# CONFIG_ARM64_ERRATUM_827319 is not set
+# CONFIG_ARM64_ERRATUM_824069 is not set
+# CONFIG_ARM64_ERRATUM_819472 is not set
+# CONFIG_ARM64_ERRATUM_832075 is not set
+# CONFIG_ARM64_ERRATUM_834220 is not set
+# CONFIG_ARM64_ERRATUM_845719 is not set
+# CONFIG_ARM64_ERRATUM_843419 is not set
+# CONFIG_ARM64_ERRATUM_1024718 is not set
+# CONFIG_ARM64_ERRATUM_1418040 is not set
+# CONFIG_ARM64_ERRATUM_1165522 is not set
+# CONFIG_ARM64_ERRATUM_1286807 is not set
+# CONFIG_ARM64_ERRATUM_1463225 is not set
+# CONFIG_ARM64_ERRATUM_1542419 is not set
+# CONFIG_CAVIUM_ERRATUM_22375 is not set
+# CONFIG_CAVIUM_ERRATUM_23154 is not set
+# CONFIG_CAVIUM_ERRATUM_27456 is not set
+# CONFIG_CAVIUM_ERRATUM_30115 is not set
+# CONFIG_CAVIUM_TX2_ERRATUM_219 is not set
+# CONFIG_QCOM_FALKOR_ERRATUM_1003 is not set
+# CONFIG_QCOM_FALKOR_ERRATUM_1009 is not set
+# CONFIG_QCOM_QDF2400_ERRATUM_0065 is not set
+# CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set
+# CONFIG_HISILICON_ERRATUM_161600802 is not set
+# CONFIG_QCOM_FALKOR_ERRATUM_E1041 is not set
+# CONFIG_FUJITSU_ERRATUM_010001 is not set
+CONFIG_ARM64_VA_BITS_48=y
+CONFIG_SCHED_MC=y
+CONFIG_NR_CPUS=48
+CONFIG_HOTPLUG_CPU=y
+CONFIG_HZ_1000=y
+CONFIG_SECCOMP=y
+CONFIG_KEXEC_FILE=y
+CONFIG_COMPAT=y
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_SWP_EMULATION=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+# CONFIG_SUSPEND is not set
+CONFIG_PM=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_ARM_CPUIDLE=y
+CONFIG_ARM_PSCI_CPUIDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+CONFIG_CPUFREQ_DT=y
+CONFIG_EFI_VARS=y
+CONFIG_EFI_BOOTLOADER_CONTROL=y
+CONFIG_ACPI=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_SCSI=m
+CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_CROSS_ENDIAN_LEGACY=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BFQ_GROUP_IOSCHED=y
+CONFIG_BINFMT_MISC=m
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=y
+CONFIG_Z3FOLD=m
+CONFIG_ZSMALLOC=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_FIB_TRIE_STATS=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_NET_IPVTI=m
+CONFIG_NET_FOU=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_NV=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NETWORK_PHY_TIMESTAMPING=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+CONFIG_NF_TABLES=m
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REJECT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPMARK=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_MAC=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_FO=m
+CONFIG_IP_VS_OVF=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_LOG_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_SYNPROXY=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_SYNPROXY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NF_LOG_BRIDGE=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_SCTP=m
+CONFIG_SCTP_COOKIE_HMAC_SHA1=y
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_TIPC=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+CONFIG_ATM_BR2684_IPFILTER=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_DSA=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_IEEE802154=m
+CONFIG_IEEE802154_NL802154_EXPERIMENTAL=y
+CONFIG_MAC802154=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_PIE=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_DCB=y
+CONFIG_BATMAN_ADV=m
+# CONFIG_BATMAN_ADV_BATMAN_V is not set
+CONFIG_BATMAN_ADV_NC=y
+CONFIG_BATMAN_ADV_DEBUGFS=y
+CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_HSR=m
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=y
+CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_CFG80211=m
+CONFIG_CFG80211_WEXT=y
+CONFIG_MAC80211=m
+CONFIG_MAC80211_MESH=y
+CONFIG_RFKILL=y
+CONFIG_RFKILL_GPIO=m
+CONFIG_NET_9P=m
+CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_PCIE_ECRC=y
+CONFIG_PCIE_PTM=y
+CONFIG_PCI_REALLOC_ENABLE_AUTO=y
+CONFIG_PCI_STUB=m
+CONFIG_PCI_IOV=y
+CONFIG_PCI_PRI=y
+CONFIG_PCI_PASID=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_PCIE_BAIKAL_HOST=y
+CONFIG_PCIE_BAIKAL_EP=y
+CONFIG_PCI_ENDPOINT=y
+CONFIG_PCI_ENDPOINT_CONFIGFS=y
+CONFIG_PCI_EPF_TEST=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_ZRAM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_RBD=y
+CONFIG_BLK_DEV_NVME=y
+CONFIG_TP_BMC=y
+CONFIG_EEPROM_AT24=y
+CONFIG_EEPROM_AT25=m
+CONFIG_RAID_ATTRS=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_HPSA=m
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_SATA_SIL24=m
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BCACHE=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_CACHE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_SWITCH=m
+CONFIG_TARGET_CORE=m
+CONFIG_TCM_IBLOCK=m
+CONFIG_TCM_FILEIO=m
+CONFIG_TCM_PSCSI=m
+CONFIG_LOOPBACK_TARGET=m
+CONFIG_ISCSI_TARGET=m
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+CONFIG_FIREWIRE_SBP2=m
+CONFIG_FIREWIRE_NET=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NET_FC=y
+CONFIG_NET_TEAM=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
+CONFIG_VXLAN=m
+CONFIG_GENEVE=m
+CONFIG_GTP=m
+CONFIG_MACSEC=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_NLMON=m
+CONFIG_NET_VRF=m
+CONFIG_ATM_TCP=m
+CONFIG_AMD_XGBE=y
+CONFIG_BAIKAL_XGBE=y
+# CONFIG_NET_VENDOR_AURORA is not set
+CONFIG_E1000E=m
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_IXGBEVF=m
+CONFIG_STMMAC_ETH=y
+CONFIG_DWMAC_BAIKAL=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_MDIO_GPIO=m
+CONFIG_MDIO_GPIO_BAIKAL=y
+CONFIG_88X2222_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_MICREL_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_USB_USBNET=y
+# CONFIG_USB_NET_ZAURUS is not set
+CONFIG_ATH10K=m
+CONFIG_ATH10K_PCI=m
+CONFIG_RTL8192CE=m
+CONFIG_RTL8192SE=m
+CONFIG_RTL8192DE=m
+CONFIG_RTL8723AE=m
+CONFIG_RTL8723BE=m
+CONFIG_RTL8188EE=m
+CONFIG_RTL8192EE=m
+CONFIG_RTL8821AE=m
+# CONFIG_IEEE802154_DRIVERS is not set
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2 is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+# CONFIG_HW_RANDOM_CAVIUM is not set
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_BAIKAL_SMBUS=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_I2C_DESIGNWARE_SLAVE=y
+CONFIG_SPI=y
+CONFIG_SPI_BAIKAL_ESPI=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+CONFIG_SPI_SPIDEV=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_GENERIC_PLATFORM=y
+CONFIG_GPIO_PCF857X=y
+CONFIG_SENSORS_PVT=y
+CONFIG_SENSORS_PVT_I2C=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+# CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is not set
+CONFIG_GPIO_WATCHDOG=m
+CONFIG_DW_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MEDIA_CAMERA_SUPPORT=y
+CONFIG_MEDIA_CONTROLLER=y
+CONFIG_VIDEO_V4L2_SUBDEV_API=y
+CONFIG_MEDIA_USB_SUPPORT=y
+CONFIG_USB_VIDEO_CLASS=m
+CONFIG_USB_M5602=m
+CONFIG_USB_STV06XX=m
+CONFIG_USB_GL860=m
+CONFIG_USB_GSPCA_BENQ=m
+CONFIG_USB_GSPCA_CONEX=m
+CONFIG_USB_GSPCA_CPIA1=m
+CONFIG_USB_GSPCA_DTCS033=m
+CONFIG_USB_GSPCA_ETOMS=m
+CONFIG_USB_GSPCA_FINEPIX=m
+CONFIG_USB_GSPCA_JEILINJ=m
+CONFIG_USB_GSPCA_JL2005BCD=m
+CONFIG_USB_GSPCA_KINECT=m
+CONFIG_USB_GSPCA_KONICA=m
+CONFIG_USB_GSPCA_MARS=m
+CONFIG_USB_GSPCA_MR97310A=m
+CONFIG_USB_GSPCA_NW80X=m
+CONFIG_USB_GSPCA_OV519=m
+CONFIG_USB_GSPCA_OV534=m
+CONFIG_USB_GSPCA_OV534_9=m
+CONFIG_USB_GSPCA_PAC207=m
+CONFIG_USB_GSPCA_PAC7302=m
+CONFIG_USB_GSPCA_PAC7311=m
+CONFIG_USB_GSPCA_SE401=m
+CONFIG_USB_GSPCA_SN9C2028=m
+CONFIG_USB_GSPCA_SN9C20X=m
+CONFIG_USB_GSPCA_SONIXB=m
+CONFIG_USB_GSPCA_SONIXJ=m
+CONFIG_USB_GSPCA_SPCA500=m
+CONFIG_USB_GSPCA_SPCA501=m
+CONFIG_USB_GSPCA_SPCA505=m
+CONFIG_USB_GSPCA_SPCA506=m
+CONFIG_USB_GSPCA_SPCA508=m
+CONFIG_USB_GSPCA_SPCA561=m
+CONFIG_USB_GSPCA_SPCA1528=m
+CONFIG_USB_GSPCA_SQ905=m
+CONFIG_USB_GSPCA_SQ905C=m
+CONFIG_USB_GSPCA_SQ930X=m
+CONFIG_USB_GSPCA_STK014=m
+CONFIG_USB_GSPCA_STK1135=m
+CONFIG_USB_GSPCA_STV0680=m
+CONFIG_USB_GSPCA_SUNPLUS=m
+CONFIG_USB_GSPCA_T613=m
+CONFIG_USB_GSPCA_TOPRO=m
+CONFIG_USB_GSPCA_TOUPTEK=m
+CONFIG_USB_GSPCA_TV8532=m
+CONFIG_USB_GSPCA_VC032X=m
+CONFIG_USB_GSPCA_VICAM=m
+CONFIG_USB_GSPCA_XIRLINK_CIT=m
+CONFIG_USB_GSPCA_ZC3XX=m
+CONFIG_USB_PWC=m
+CONFIG_VIDEO_CPIA2=m
+CONFIG_USB_ZR364XX=m
+CONFIG_USB_STKWEBCAM=m
+CONFIG_USB_S2255=m
+CONFIG_VIDEO_USBTV=m
+CONFIG_VIDEO_EM28XX=m
+CONFIG_VIDEO_EM28XX_V4L2=m
+CONFIG_VIDEO_EM28XX_ALSA=m
+CONFIG_DRM=y
+# CONFIG_DRM_I2C_CH7006 is not set
+# CONFIG_DRM_I2C_SIL164 is not set
+CONFIG_DRM_BAIKAL_VDU=y
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_AMDGPU=m
+CONFIG_DRM_NOUVEAU=m
+CONFIG_DRM_VGEM=m
+CONFIG_DRM_CIRRUS_QEMU=m
+CONFIG_DRM_QXL=m
+CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_PANEL_LVDS=y
+CONFIG_DRM_BAIKAL_HDMI=y
+CONFIG_DRM_BAIKAL_HDMI_AHB_AUDIO=y
+CONFIG_DRM_PANFROST=m
+CONFIG_MALI_MIDGARD=m
+CONFIG_MALI_MIDGARD_ENABLE_TRACE=y
+CONFIG_MALI_DMA_FENCE=y
+# CONFIG_MALI_KUTF is not set
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_EFI=y
+CONFIG_LCD_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_HRTIMER=m
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_HDA_BAIKAL_M=y
+CONFIG_SND_HDA_HWDEP=y
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+# CONFIG_SND_SPI is not set
+CONFIG_SND_USB_AUDIO=y
+# CONFIG_SND_FIREWIRE is not set
+CONFIG_SND_SOC=y
+CONFIG_SND_DESIGNWARE_I2S=y
+CONFIG_SND_DESIGNWARE_PCM=y
+CONFIG_SND_SOC_AC97_CODEC=m
+CONFIG_SND_SOC_SPDIF=m
+CONFIG_SND_SOC_TLV320AIC3X=y
+CONFIG_SND_SOC_NAU8822=y
+CONFIG_SND_SIMPLE_CARD=y
+CONFIG_HID_BATTERY_STRENGTH=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_LOGITECH_HIDPP=m
+CONFIG_USB_ULPI_BUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_HCD_TEST_MODE=y
+CONFIG_USB_ACM=m
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_USBIP_CORE=m
+CONFIG_USBIP_VHCI_HCD=m
+CONFIG_USBIP_HOST=m
+CONFIG_USB_DWC3=y
+CONFIG_USB_DWC3_ULPI=y
+# CONFIG_USB_DWC3_HAPS is not set
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_CP210X=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_ULPI=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_DWCMSHC=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_EDAC=y
+CONFIG_EDAC_BAIKAL=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_ABEOZ9=y
+CONFIG_RTC_DRV_PCF2127=y
+CONFIG_DMADEVICES=y
+CONFIG_PL330_DMA=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=m
+CONFIG_VIRTIO_MMIO=m
+CONFIG_COMMON_CLK_VERSATILE=y
+# CONFIG_FSL_ERRATUM_A008585 is not set
+# CONFIG_HISILICON_ERRATUM_161010101 is not set
+# CONFIG_ARM64_ERRATUM_858921 is not set
+CONFIG_MAILBOX=y
+CONFIG_ARM_MHU=m
+CONFIG_ARM_SMMU=y
+CONFIG_ARM_SMMU_V3=y
+CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
+CONFIG_USB_PHY_BAIKAL=y
+CONFIG_ARM_CCN=y
+CONFIG_MUX_BAIKAL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FANOTIFY=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=m
+CONFIG_FSCACHE=y
+CONFIG_FSCACHE_STATS=y
+CONFIG_CACHEFILES=y
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_FAT_DEFAULT_UTF8=y
+CONFIG_NTFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_CHILDREN=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_PSTORE=y
+# CONFIG_PSTORE_DEFLATE_COMPRESS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_SWAP=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="y"
+CONFIG_NFS_V4_1_MIGRATION=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_NFSD_SCSILAYOUT=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+CONFIG_SUNRPC_DEBUG=y
+CONFIG_9P_FS=m
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_852=y
+CONFIG_NLS_CODEPAGE_866=y
+CONFIG_NLS_CODEPAGE_1251=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_5=y
+CONFIG_NLS_KOI8_R=y
+CONFIG_NLS_UTF8=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_ENCRYPTED_KEYS=y
+CONFIG_CRYPTO_CRYPTD=y
+CONFIG_CRYPTO_CRC32=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_CRYPTO_LZ4HC=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_CRYPTO_USER_API_AEAD=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=256
+CONFIG_IRQ_POLL=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_POISONING=y
+CONFIG_TEST_USER_COPY=m
+CONFIG_TEST_BPF=m
+CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_STATIC_KEYS=m
+CONFIG_IO_STRICT_DEVMEM=y
diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c
index ed2f880b63b5f..f0f61bbfe178d 100644
--- a/drivers/acpi/pci_mcfg.c
+++ b/drivers/acpi/pci_mcfg.c
@@ -55,6 +55,30 @@ static struct mcfg_fixup mcfg_quirks[] = {
 	AL_ECAM("GRAVITON", 0, 6, &al_pcie_ops),
 	AL_ECAM("GRAVITON", 0, 7, &al_pcie_ops),
 
+#define BAIKAL_ECAM(table_id, rev, seg, ops) \
+	{ "BAIKAL", table_id, rev, seg, MCFG_BUS_ANY, ops }
+
+	/* Baikal-M Synopsys DesignWare PCIe */
+	BAIKAL_ECAM("BKLEMCFG", 1, 0, &baikal_m_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 1, 1, &baikal_m_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 1, 2, &baikal_m_pcie_ecam_ops),
+
+	/* Baikal-S Synopsys DesignWare PCIe */
+	BAIKAL_ECAM("BKLEMCFG", 2, 0, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 1, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 2, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 3, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 4, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 5, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 6, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 7, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 8, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 9, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 10, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 11, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 12, &baikal_s_pcie_ecam_ops),
+	BAIKAL_ECAM("BKLEMCFG", 2, 13, &baikal_s_pcie_ecam_ops),
+
 #define QCOM_ECAM32(seg) \
 	{ "QCOM  ", "QDF2432 ", 1, seg, MCFG_BUS_ANY, &pci_32b_ops }
 
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 0138fb14e6f88..6ed8cddd31ccd 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -72,6 +72,7 @@ obj-y					+= analogbits/
 obj-$(CONFIG_COMMON_CLK_AT91)		+= at91/
 obj-$(CONFIG_ARCH_ARTPEC)		+= axis/
 obj-$(CONFIG_ARC_PLAT_AXS10X)		+= axs10x/
+obj-$(CONFIG_ARCH_BAIKAL)		+= baikal/
 obj-y					+= bcm/
 obj-$(CONFIG_ARCH_BERLIN)		+= berlin/
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci/
diff --git a/drivers/clk/baikal/Makefile b/drivers/clk/baikal/Makefile
new file mode 100644
index 0000000000000..dc94047aa9131
--- /dev/null
+++ b/drivers/clk/baikal/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += clk-bm1000.o
+obj-y += clk-bs1000.o
diff --git a/drivers/clk/baikal/clk-bm1000.c b/drivers/clk/baikal/clk-bm1000.c
new file mode 100644
index 0000000000000..449851a0f6e67
--- /dev/null
+++ b/drivers/clk/baikal/clk-bm1000.c
@@ -0,0 +1,845 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2015-2023 Baikal Electronics, JSC
+ * Author: Ekaterina Skachko <ekaterina.skachko@baikalelectronics.ru>
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#define BAIKAL_SMC_CMU_CMD	0x82000000
+#define CMU_PLL_SET_RATE	0
+#define CMU_PLL_GET_RATE	1
+#define CMU_PLL_ENABLE		2
+#define CMU_PLL_DISABLE		3
+#define CMU_PLL_ROUND_RATE	4
+#define CMU_PLL_IS_ENABLED	5
+#define CMU_CLK_CH_SET_RATE	6
+#define CMU_CLK_CH_GET_RATE	7
+#define CMU_CLK_CH_ENABLE	8
+#define CMU_CLK_CH_DISABLE	9
+#define CMU_CLK_CH_ROUND_RATE	10
+#define CMU_CLK_CH_IS_ENABLED	11
+
+struct baikal_clk_cmu {
+	struct clk_hw	hw;
+	u32		base;
+	unsigned int	parent;
+	const char	*name;
+	u32		is_clk_ch;
+};
+
+#define to_baikal_cmu(_hw) container_of(_hw, struct baikal_clk_cmu, hw)
+
+static int baikal_clk_enable(struct clk_hw *hw)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+
+	if (pclk->is_clk_ch)
+		cmd = CMU_CLK_CH_ENABLE;
+	else
+		cmd = CMU_PLL_ENABLE;
+
+	/* If clock valid */
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, 0,
+		      pclk->parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x): %s\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 res.a0 ? "error" : "ok");
+
+	return res.a0;
+}
+
+static void baikal_clk_disable(struct clk_hw *hw)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+
+	if (pclk->is_clk_ch)
+		cmd = CMU_CLK_CH_DISABLE;
+	else
+		cmd = CMU_PLL_DISABLE;
+
+	/* If clock valid */
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, 0,
+		      pclk->parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x): %s\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 res.a0 ? "error" : "ok");
+}
+
+static int baikal_clk_is_enabled(struct clk_hw *hw)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+
+	if (pclk->is_clk_ch)
+		cmd = CMU_CLK_CH_IS_ENABLED;
+	else
+		cmd = CMU_PLL_IS_ENABLED;
+
+	/* If clock valid */
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, 0,
+		      pclk->parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x): %s\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 res.a0 ? "true" : "false");
+
+	return res.a0;
+}
+
+static unsigned long baikal_clk_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+	unsigned long parent;
+
+	if (pclk->is_clk_ch) {
+		cmd = CMU_CLK_CH_GET_RATE;
+		parent = pclk->parent;
+	} else {
+		cmd = CMU_PLL_GET_RATE;
+		parent = parent_rate;
+	}
+
+	/* If clock valid */
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, 0,
+		      parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x): %ld Hz\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 res.a0);
+
+	/* Return actual freq */
+	return res.a0;
+}
+
+static int baikal_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+	unsigned long parent;
+
+	if (pclk->is_clk_ch) {
+		cmd = CMU_CLK_CH_SET_RATE;
+		parent = pclk->parent;
+	} else {
+		cmd = CMU_PLL_SET_RATE;
+		parent = parent_rate;
+	}
+
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, rate,
+		      parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x, %ld Hz): %s\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 rate,
+		 res.a0 ? "error" : "ok");
+
+	return res.a0;
+}
+
+static long baikal_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	struct arm_smccc_res res;
+	struct baikal_clk_cmu *pclk = to_baikal_cmu(hw);
+	u32 cmd;
+	unsigned long parent;
+
+	if (pclk->is_clk_ch) {
+		cmd = CMU_CLK_CH_ROUND_RATE;
+		parent = pclk->parent;
+	} else {
+		cmd = CMU_PLL_ROUND_RATE;
+		parent = *prate;
+	}
+
+	/* If clock valid */
+	arm_smccc_smc(BAIKAL_SMC_CMU_CMD, pclk->base, cmd, rate,
+		      parent, 0, 0, 0, &res);
+
+	pr_debug("%s(%s, %s@0x%x): %ld Hz\n",
+		 __func__,
+		 pclk->name,
+		 pclk->is_clk_ch ? "clkch" : "pll",
+		 pclk->base,
+		 res.a0);
+
+	/* Return actual freq */
+	return res.a0;
+}
+
+static const struct clk_ops baikal_clk_ops = {
+	.enable	     = baikal_clk_enable,
+	.disable     = baikal_clk_disable,
+	.is_enabled  = baikal_clk_is_enabled,
+	.recalc_rate = baikal_clk_recalc_rate,
+	.set_rate    = baikal_clk_set_rate,
+	.round_rate  = baikal_clk_round_rate,
+};
+
+static int baikal_clk_probe(struct platform_device *pdev)
+{
+	struct clk_init_data init;
+	struct clk_init_data *init_ch;
+	struct baikal_clk_cmu *cmu;
+	struct baikal_clk_cmu **cmu_ch;
+	struct device_node *node = pdev->dev.of_node;
+
+	struct clk *clk;
+	struct clk_onecell_data *clk_ch;
+
+	int i = 0, number, rc;
+	u32 index;
+	u64 base;
+	struct property *prop;
+	const __be32 *p;
+	const char *clk_ch_name;
+	const char *parent_name;
+
+	cmu = kmalloc(sizeof(*cmu), GFP_KERNEL);
+	if (!cmu)
+		return -ENOMEM;
+
+	of_property_read_string(node, "clock-output-names", &cmu->name);
+	of_property_read_u32(node, "clock-frequency", &cmu->parent);
+	rc = of_property_read_u64(node, "reg", &base);
+	if (rc)
+		return rc;
+
+	cmu->base = base;
+
+	parent_name = of_clk_get_parent_name(node, 0);
+
+	/* Setup clock init structure */
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.name = cmu->name;
+	init.ops = &baikal_clk_ops;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	cmu->hw.init = &init;
+	cmu->is_clk_ch = 0;
+
+	pr_debug("%s: add %s, parent %s\n",
+		 __func__, cmu->name, parent_name ? parent_name : "null");
+
+	clk = clk_register(NULL, &cmu->hw);
+	if (IS_ERR(clk)) {
+		pr_err("%s: could not register clk %s\n", __func__, cmu->name);
+		return -ENOMEM;
+	}
+
+	/* Register the clock for lookup */
+	rc = clk_register_clkdev(clk, cmu->name, NULL);
+	if (rc != 0) {
+		pr_err("%s: could not register lookup clk %s\n",
+		       __func__, cmu->name);
+	}
+
+	/* FIXME: we probably shouldn't enable it here */
+	clk_prepare_enable(clk);
+
+	number = of_property_count_u32_elems(node, "clock-indices");
+
+	if (number > 0) {
+		clk_ch = kmalloc(sizeof(*clk_ch), GFP_KERNEL);
+		if (!clk_ch)
+			return -ENOMEM;
+
+		/* Get the last index to find out max number of children*/
+		of_property_for_each_u32(node, "clock-indices", prop, p, index) {
+			;
+		}
+
+		clk_ch->clks = kcalloc(index + 1, sizeof(struct clk *), GFP_KERNEL);
+		clk_ch->clk_num = index + 1;
+		cmu_ch = kcalloc((index + 1), sizeof(struct baikal_clk_cmu *), GFP_KERNEL);
+		if (!cmu_ch) {
+			kfree(clk_ch);
+			return -ENOMEM;
+		}
+
+		init_ch = kcalloc((number + 1), sizeof(struct clk_init_data), GFP_KERNEL);
+		if (!init_ch) {
+			kfree(cmu_ch);
+			kfree(clk_ch);
+			return -ENOMEM;
+		}
+
+		of_property_for_each_u32(node, "clock-indices", prop, p, index) {
+			of_property_read_string_index(node, "clock-names",
+						      i, &clk_ch_name);
+			pr_debug("%s: clkch <%s>, index %d, i %d\n", __func__, clk_ch_name, index, i);
+			init_ch[i].parent_names = &cmu->name;
+			init_ch[i].num_parents = 1;
+			init_ch[i].name = clk_ch_name;
+			init_ch[i].ops = &baikal_clk_ops;
+			init_ch[i].flags = CLK_IGNORE_UNUSED;
+
+			cmu_ch[index] = kmalloc(sizeof(*cmu_ch[index]), GFP_KERNEL);
+			cmu_ch[index]->name = clk_ch_name;
+			cmu_ch[index]->base = index;
+			cmu_ch[index]->parent = cmu->base;
+			cmu_ch[index]->is_clk_ch = 1;
+			cmu_ch[index]->hw.init = &init_ch[i];
+			clk_ch->clks[index] = clk_register(NULL, &cmu_ch[index]->hw);
+
+			if (IS_ERR(clk_ch->clks[index])) {
+				pr_err("%s: could not register clk %s\n",
+				       __func__, clk_ch_name);
+			}
+
+			/* Register the clock for lookup */
+			rc = clk_register_clkdev(clk_ch->clks[index], clk_ch_name, NULL);
+			if (rc != 0) {
+				pr_err("%s: could not register lookup clk %s\n",
+				       __func__, clk_ch_name);
+			}
+
+			/* FIXME: we probably shouldn't enable it here */
+			clk_prepare_enable(clk_ch->clks[index]);
+			i++;
+		}
+
+		return of_clk_add_provider(pdev->dev.of_node, of_clk_src_onecell_get, clk_ch);
+	}
+
+	return of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, clk);
+}
+
+static int baikal_clk_remove(struct platform_device *pdev)
+{
+	of_clk_del_provider(pdev->dev.of_node);
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+const char *baikal_acpi_clk_osc25_str[] = { "osc25" };
+const char *baikal_acpi_clk_osc27_str[] = { "osc27" };
+
+static struct clk *baikal_acpi_clk_osc25;
+static struct clk *baikal_acpi_clk_osc27;
+
+#define BAIKAL_CMU_CLK_CH	0x0
+#define BAIKAL_FIXED_CLK	0x1
+#define BAIKAL_FIXED_FACTOR_CLK	0x2
+#define BAIKAL_CMU_CLK          0xffffffffffffffff
+
+struct baikal_acpi_clk_data {
+	struct clk *cmu_clk;
+	struct clk_lookup *cmu_clk_l;
+	struct clk_lookup **cmu_clk_refs_l;
+	struct clk **clk;
+	struct clk_lookup **clk_l;
+	u8 *type;
+	unsigned int clk_num;
+	unsigned int cmu_clk_refs_num;
+};
+
+static int baikal_acpi_clk_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct acpi_device *ref_dev, *adev = to_acpi_device_node(pdev->dev.fwnode);
+	struct clk_init_data init, *init_ch;
+	struct baikal_clk_cmu *cmu, *cmu_ch;
+	struct baikal_acpi_clk_data *clk_data = NULL;
+	union acpi_object *package, *element;
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	int osc27, i, ret = 0;
+	char *str, *str2;
+
+	cmu = devm_kzalloc(dev, sizeof(*cmu), GFP_KERNEL);
+	if (!cmu)
+		return -ENOMEM;
+
+	status = acpi_evaluate_object_typed(adev->handle, "PROP", NULL, &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get PROP data\n");
+		return -ENODEV;
+	}
+
+	package = buffer.pointer;
+	if (package->package.count != 4) {
+		dev_err(dev, "invalid PROP data\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	element = &package->package.elements[0];
+	if (element->type != ACPI_TYPE_INTEGER) {
+		dev_err(dev, "failed to get CMU id\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	cmu->base = element->integer.value;
+
+	element = &package->package.elements[1];
+	if (element->type != ACPI_TYPE_STRING) {
+		dev_err(dev, "failed to get CMU clock name\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	str = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+	if (!str) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	memcpy(str, element->string.pointer, element->string.length);
+	cmu->name = str;
+
+	element = &package->package.elements[2];
+	if (element->type != ACPI_TYPE_INTEGER) {
+		dev_err(dev, "failed to get CMU frequency\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	cmu->parent = element->integer.value;
+
+	element = &package->package.elements[3];
+	if (element->type != ACPI_TYPE_INTEGER) {
+		dev_err(dev, "failed to get CMU osc type\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	osc27 = element->integer.value ? 1 : 0;
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	init.parent_names = osc27 ? baikal_acpi_clk_osc27_str : baikal_acpi_clk_osc25_str;
+	init.num_parents = 1;
+	init.name = cmu->name;
+	init.ops = &baikal_clk_ops;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	cmu->hw.init = &init;
+	cmu->is_clk_ch = 0;
+
+	clk_data = devm_kzalloc(dev, sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->cmu_clk = clk_register(NULL, &cmu->hw);
+	if (IS_ERR(clk_data->cmu_clk)) {
+		dev_err(dev, "failed to register CMU clock\n");
+		return PTR_ERR(clk_data->cmu_clk);
+	}
+
+	clk_data->cmu_clk_l = clkdev_create(clk_data->cmu_clk, cmu->name, NULL);
+	if (!clk_data->cmu_clk_l) {
+		dev_err(dev, "failed to register CMU clock lookup\n");
+		clk_unregister(clk_data->cmu_clk);
+		return -ENOMEM;
+	}
+
+	clk_prepare_enable(clk_data->cmu_clk);
+
+	platform_set_drvdata(pdev, clk_data);
+
+	/* CPU clock */
+	status = acpi_evaluate_object_typed(adev->handle, "CMU", NULL, &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		buffer.pointer = NULL;
+		goto clk_channels;
+	}
+
+	package = buffer.pointer;
+	if (!package->package.count || package->package.count % 2) {
+		dev_err(dev, "invalid CMU data\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	clk_data->cmu_clk_refs_num = package->package.count >> 1;
+	clk_data->cmu_clk_refs_l = devm_kzalloc(dev,
+						clk_data->cmu_clk_refs_num * sizeof(struct clk_lookup *),
+						GFP_KERNEL);
+	if (!clk_data->cmu_clk_refs_l) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	for (i = 0; i < clk_data->cmu_clk_refs_num; ++i) {
+		ref_dev = NULL;
+
+		element = &package->package.elements[2 * i];
+		if (element->type == ACPI_TYPE_LOCAL_REFERENCE && element->reference.handle) {
+			if (acpi_bus_get_device(element->reference.handle, &ref_dev))
+				ref_dev = NULL;
+		}
+
+		element = &package->package.elements[2 * i + 1];
+		if (element->type == ACPI_TYPE_STRING) {
+			str2 = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+			if (str2)
+				memcpy(str2, element->string.pointer, element->string.length);
+		} else {
+			str2 = NULL;
+		}
+
+		if (ref_dev && acpi_get_first_physical_node(ref_dev))
+			clk_data->cmu_clk_refs_l[i] =
+				clkdev_create(clk_data->cmu_clk, str2, "%s",
+					      dev_name(acpi_get_first_physical_node(ref_dev)));
+		else
+			clk_data->cmu_clk_refs_l[i] = clkdev_create(clk_data->cmu_clk, str2, NULL);
+	}
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+clk_channels:
+	status = acpi_evaluate_object_typed(adev->handle, "CLKS", NULL, &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		buffer.pointer = NULL;
+		clk_data = NULL;
+		goto ret;
+	}
+
+	package = buffer.pointer;
+	if (!package->package.count || package->package.count % 4) {
+		dev_err(dev, "invalid CLKS data\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	clk_data->clk_num = package->package.count >> 2;
+	clk_data->clk = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk *), GFP_KERNEL);
+	if (!clk_data->clk) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	clk_data->clk_l = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk_lookup *),
+				       GFP_KERNEL);
+	if (!clk_data->clk_l) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	clk_data->type = devm_kzalloc(dev, clk_data->clk_num * sizeof(u8), GFP_KERNEL);
+	if (!clk_data->type) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	init_ch = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk_init_data), GFP_KERNEL);
+	if (!init_ch) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	cmu_ch = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct baikal_clk_cmu), GFP_KERNEL);
+	if (!cmu_ch) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	for (i = 0; i < clk_data->clk_num; ++i) {
+		ref_dev = NULL;
+
+		element = &package->package.elements[4 * i];
+		if (element->type == ACPI_TYPE_LOCAL_REFERENCE && element->reference.handle) {
+			if (acpi_bus_get_device(element->reference.handle, &ref_dev))
+				ref_dev = NULL;
+		}
+
+		element = &package->package.elements[4 * i + 1];
+		if (element->type == ACPI_TYPE_STRING) {
+			str = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+			if (str)
+				memcpy(str, element->string.pointer, element->string.length);
+		} else {
+			dev_err(dev, "failed to process clock device name #%i\n", i);
+			continue;
+		}
+
+		element = &package->package.elements[4 * i + 2];
+		if (element->type == ACPI_TYPE_INTEGER) {
+			if (element->integer.value != BAIKAL_CMU_CLK) {
+				init_ch[i].parent_names = &cmu->name;
+				init_ch[i].num_parents = 1;
+				init_ch[i].name = str;
+				init_ch[i].ops = &baikal_clk_ops;
+				init_ch[i].flags = CLK_IGNORE_UNUSED;
+
+				cmu_ch[i].name = str;
+				cmu_ch[i].base = element->integer.value;
+				cmu_ch[i].parent = cmu->base;
+				cmu_ch[i].is_clk_ch = 1;
+				cmu_ch[i].hw.init = &init_ch[i];
+
+				clk_data->type[i] = BAIKAL_CMU_CLK_CH;
+				clk_data->clk[i] = clk_register(ref_dev ? &ref_dev->dev : NULL,
+								&cmu_ch[i].hw);
+				if (IS_ERR(clk_data->clk[i])) {
+					dev_err(dev, "failed to register CMU channel clock #%i\n", i);
+					clk_data->clk[i] = NULL;
+					continue;
+				}
+			}
+		} else if (element->type == ACPI_TYPE_PACKAGE &&
+			   element->package.count == 3 &&
+			   element->package.elements[0].type == ACPI_TYPE_INTEGER &&
+			   element->package.elements[1].type == ACPI_TYPE_INTEGER &&
+			   element->package.elements[2].type == ACPI_TYPE_INTEGER) {
+			/* Fixed clock */
+			struct clk_hw *hw;
+			u64 type = element->package.elements[0].integer.value;
+			u64 val1 = element->package.elements[1].integer.value;
+			u64 val2 = element->package.elements[2].integer.value;
+
+			switch (type) {
+			case BAIKAL_FIXED_CLK:
+				clk_data->type[i] = BAIKAL_FIXED_CLK;
+				hw = clk_hw_register_fixed_rate_with_accuracy(ref_dev ?
+									      &ref_dev->dev : NULL,
+									      str, NULL, 0,
+									      val1, val2);
+				if (IS_ERR(hw)) {
+					dev_err(dev, "failed to register fixed clock #%i\n", i);
+					continue;
+				}
+				clk_data->clk[i] = hw->clk;
+				break;
+			case BAIKAL_FIXED_FACTOR_CLK:
+				clk_data->type[i] = BAIKAL_FIXED_FACTOR_CLK;
+				hw = clk_hw_register_fixed_factor(ref_dev ? &ref_dev->dev : NULL,
+								  str, cmu->name, 0, val1, val2);
+				if (IS_ERR(hw)) {
+					dev_err(dev, "failed to register fixed-factor clock #%i\n", i);
+					continue;
+				}
+				clk_data->clk[i] = hw->clk;
+				break;
+			default:
+				dev_err(dev, "failed to create clock #%i\n", i);
+				continue;
+			}
+		} else {
+			dev_err(dev, "failed to process clock device id #%i\n", i);
+			continue;
+		}
+
+		element = &package->package.elements[4 * i + 3];
+		if (element->type == ACPI_TYPE_STRING) {
+			str2 = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+			if (str2)
+				memcpy(str2, element->string.pointer, element->string.length);
+		} else {
+			str2 = NULL;
+		}
+
+		if (ref_dev || str2) {
+			if (!clk_data->clk[i]) {
+				if (ref_dev)
+					clk_data->clk_l[i] = clkdev_create(clk_data->cmu_clk,
+									   str2, "%s",
+									   dev_name(&ref_dev->dev));
+				else
+					clk_data->clk_l[i] = clkdev_create(clk_data->cmu_clk,
+									   str2, NULL);
+			} else {
+				if (ref_dev)
+					clk_data->clk_l[i] = clkdev_create(clk_data->clk[i],
+									   str2, "%s",
+									   dev_name(&ref_dev->dev));
+				else
+					clk_data->clk_l[i] = clkdev_create(clk_data->clk[i],
+									   str2, NULL);
+			}
+
+			if (!clk_data->clk_l[i]) {
+				dev_err(dev, "failed to register clock lookup #%i\n", i);
+				clk_unregister(clk_data->clk[i]);
+				clk_data->clk[i] = NULL;
+				continue;
+			}
+		}
+
+		clk_prepare_enable(clk_data->clk[i]);
+	}
+
+	clk_data = NULL;
+
+ret:
+	if (buffer.pointer)
+		acpi_os_free(buffer.pointer);
+
+	if (clk_data) {
+		clk_disable_unprepare(clk_data->cmu_clk);
+		clkdev_drop(clk_data->cmu_clk_l);
+		clk_unregister(clk_data->cmu_clk);
+	}
+
+	return ret;
+}
+
+static int baikal_acpi_clk_remove(struct platform_device *pdev)
+{
+	struct baikal_acpi_clk_data *clk_data = platform_get_drvdata(pdev);
+	int i;
+
+	if (clk_data) {
+		clk_disable_unprepare(clk_data->cmu_clk);
+		clkdev_drop(clk_data->cmu_clk_l);
+		clk_unregister(clk_data->cmu_clk);
+
+		for (i = 0; i < clk_data->cmu_clk_refs_num; ++i) {
+			if (clk_data->cmu_clk_refs_l[i])
+				clkdev_drop(clk_data->cmu_clk_refs_l[i]);
+		}
+
+		for (i = 0; i < clk_data->clk_num; ++i) {
+			if (clk_data->clk_l[i])
+				clkdev_drop(clk_data->clk_l[i]);
+			if (clk_data->clk[i]) {
+				switch (clk_data->type[i]) {
+				case BAIKAL_FIXED_CLK:
+					clk_unregister_fixed_rate(clk_data->clk[i]);
+					break;
+				case BAIKAL_FIXED_FACTOR_CLK:
+					clk_unregister_fixed_factor(clk_data->clk[i]);
+					break;
+				default:
+					clk_unregister(clk_data->clk[i]);
+					break;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static const struct acpi_device_id baikal_acpi_clk_device_ids[] = {
+	{ "BKLE0001" },
+	{ }
+};
+
+static struct platform_driver baikal_acpi_clk_driver = {
+	.probe		= baikal_acpi_clk_probe,
+	.remove		= baikal_acpi_clk_remove,
+	.driver		= {
+		.name	= "bm1000-cmu-acpi",
+		.acpi_match_table = ACPI_PTR(baikal_acpi_clk_device_ids)
+	}
+};
+
+static int __init bm1000_cmu_driver_acpi_init(void)
+{
+	if (!acpi_disabled) {
+		struct clk_lookup *baikal_acpi_clk_lookup_osc25;
+		struct clk_lookup *baikal_acpi_clk_lookup_osc27;
+
+		baikal_acpi_clk_osc25 = clk_register_fixed_rate(NULL, baikal_acpi_clk_osc25_str[0],
+								NULL, 0, 25000000);
+		if (IS_ERR(baikal_acpi_clk_osc25)) {
+			pr_err("%s: failed to register osc25 clock\n", __func__);
+			return PTR_ERR(baikal_acpi_clk_osc25);
+		}
+
+		baikal_acpi_clk_osc27 = clk_register_fixed_rate(NULL, baikal_acpi_clk_osc27_str[0],
+								NULL, 0, 27000000);
+		if (IS_ERR(baikal_acpi_clk_osc27)) {
+			clk_unregister_fixed_rate(baikal_acpi_clk_osc25);
+			pr_err("%s: failed to register osc27 clock\n", __func__);
+			return PTR_ERR(baikal_acpi_clk_osc27);
+		}
+
+		baikal_acpi_clk_lookup_osc25 = clkdev_create(baikal_acpi_clk_osc25, NULL, "%s",
+							     baikal_acpi_clk_osc25_str[0]);
+		if (!baikal_acpi_clk_lookup_osc25) {
+			clk_unregister_fixed_rate(baikal_acpi_clk_osc27);
+			clk_unregister_fixed_rate(baikal_acpi_clk_osc25);
+			pr_err("%s: failed to register osc25 clock lookup\n", __func__);
+			return -ENOMEM;
+		}
+
+		baikal_acpi_clk_lookup_osc27 = clkdev_create(baikal_acpi_clk_osc27, NULL, "%s",
+							     baikal_acpi_clk_osc27_str[0]);
+		if (!baikal_acpi_clk_lookup_osc27) {
+			clkdev_drop(baikal_acpi_clk_lookup_osc25);
+			clk_unregister_fixed_rate(baikal_acpi_clk_osc27);
+			clk_unregister_fixed_rate(baikal_acpi_clk_osc25);
+			pr_err("%s: failed to register osc27 clock lookup\n", __func__);
+			return -ENOMEM;
+		}
+
+		clk_prepare_enable(baikal_acpi_clk_osc25);
+		clk_prepare_enable(baikal_acpi_clk_osc27);
+
+		return platform_driver_register(&baikal_acpi_clk_driver);
+	}
+
+	return 0;
+}
+
+device_initcall(bm1000_cmu_driver_acpi_init);
+#endif
+
+static const struct of_device_id baikal_clk_of_match[] = {
+	{ .compatible = "baikal,bm1000-cmu" },
+	{ }
+};
+
+static struct platform_driver bm1000_cmu_driver = {
+	.probe	= baikal_clk_probe,
+	.remove	= baikal_clk_remove,
+	.driver	= {
+		.name = "bm1000-cmu",
+		.of_match_table = baikal_clk_of_match
+	}
+};
+module_platform_driver(bm1000_cmu_driver);
+
+MODULE_DESCRIPTION("Baikal BE-M1000 clock driver");
+MODULE_AUTHOR("Ekaterina Skachko <ekaterina.skachko@baikalelectronics.ru>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bm1000-cmu");
diff --git a/drivers/clk/baikal/clk-bs1000.c b/drivers/clk/baikal/clk-bs1000.c
new file mode 100644
index 0000000000000..82be1db430a81
--- /dev/null
+++ b/drivers/clk/baikal/clk-bs1000.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2021-2023 Baikal Electronics, JSC
+ * Author: Ekaterina Skachko <ekaterina.skachko@baikalelectronics.ru>
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#define BAIKAL_SMC_CLK_ROUND		0x82000400
+#define BAIKAL_SMC_CLK_SET		0x82000401
+#define BAIKAL_SMC_CLK_GET		0x82000402
+#define BAIKAL_SMC_CLK_ENABLE		0x82000403
+#define BAIKAL_SMC_CLK_DISABLE		0x82000404
+#define BAIKAL_SMC_CLK_IS_ENABLED	0x82000405
+
+struct baikal_clk {
+	struct clk_hw	hw;
+	u32		base;
+};
+
+#define to_baikal_clk(_hw) container_of(_hw, struct baikal_clk, hw)
+
+static int baikal_clk_enable(struct clk_hw *hw)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_ENABLE, clk->base, 0, 0, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static void baikal_clk_disable(struct clk_hw *hw)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_DISABLE, clk->base, 0, 0, 0, 0, 0, 0, &res);
+}
+
+static int baikal_clk_is_enabled(struct clk_hw *hw)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_IS_ENABLED, clk->base, 0, 0, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static unsigned long baikal_clk_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_GET, clk->base, 0, 0, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static int baikal_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_SET, clk->base, rate, 0, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static long baikal_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	struct baikal_clk *clk = to_baikal_clk(hw);
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_CLK_ROUND, clk->base, rate, 0, 0, 0, 0, 0, &res);
+	return res.a0;
+}
+
+static const struct clk_ops baikal_clk_ops = {
+	.enable	     = baikal_clk_enable,
+	.disable     = baikal_clk_disable,
+	.is_enabled  = baikal_clk_is_enabled,
+	.recalc_rate = baikal_clk_recalc_rate,
+	.set_rate    = baikal_clk_set_rate,
+	.round_rate  = baikal_clk_round_rate,
+};
+
+static int baikal_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct clk_init_data init;
+	struct baikal_clk *cmu;
+	struct clk_onecell_data *clk_data;
+	const char *clk_name;
+	struct property *prop;
+	const __be32 *p;
+	int clk_index;
+	int clk_index_max;
+	int clk_index_cnt;
+	int clk_name_cnt;
+	int clk_cnt;
+	int i;
+	u32 base;
+	struct clk *clk;
+	int ret;
+	int multi;
+
+	ret = of_property_read_u32(node, "reg", &base);
+	if (ret)
+		base = 0;
+
+	clk_index_cnt = of_property_count_u32_elems(node, "clock-indices");
+	clk_name_cnt = of_property_count_strings(node, "clock-output-names");
+	clk_cnt = clk_index_cnt > clk_name_cnt ? clk_index_cnt : clk_name_cnt;
+	if (clk_cnt < 1)
+		clk_cnt = 1;
+
+	multi = clk_cnt > 1;
+
+	if (multi) {
+		clk_index_max = clk_cnt - 1;
+		of_property_for_each_u32(node, "clock-indices", prop, p, clk_index) {
+			if (clk_index_max < clk_index)
+				clk_index_max = clk_index;
+		}
+
+		clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
+		clk_data->clks = kcalloc(clk_index_max + 1, sizeof(struct clk *), GFP_KERNEL);
+		clk_data->clk_num = clk_index_max + 1;
+	}
+
+	for (i = 0; i < clk_cnt; i++) {
+		ret = of_property_read_u32_index(node, "clock-indices", i, &clk_index);
+		if (ret)
+			clk_index = i;
+
+		ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name);
+		if (ret) {
+			if (multi)
+				init.name = kasprintf(GFP_KERNEL, "%s.%d", node->name, clk_index);
+			else
+				init.name = kasprintf(GFP_KERNEL, "%s",	   node->name);
+		} else {
+			init.name = kasprintf(GFP_KERNEL, "%s.%s", node->name, clk_name);
+		}
+
+		init.ops = &baikal_clk_ops;
+		init.flags = CLK_IGNORE_UNUSED;
+		init.parent_names = NULL;
+		init.num_parents = 0;
+
+		cmu = kmalloc(sizeof(*cmu), GFP_KERNEL);
+		cmu->base = base + 0x10 * clk_index;
+		cmu->hw.init = &init;
+
+		clk = clk_register(NULL, &cmu->hw);
+		if (!IS_ERR(clk)) {
+			clk_register_clkdev(clk, init.name, NULL);
+			if (multi)
+				clk_data->clks[clk_index] = clk;
+		}
+	}
+
+	if (multi)
+		ret = of_clk_add_provider(pdev->dev.of_node, of_clk_src_onecell_get, clk_data);
+	else
+		ret = of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, clk);
+
+	return ret;
+}
+
+static int baikal_clk_remove(struct platform_device *pdev)
+{
+	of_clk_del_provider(pdev->dev.of_node);
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+const char *baikal_acpi_ref_clk_str[] = { "baikal_ref_clk" };
+
+static struct clk *baikal_acpi_ref_clk;
+
+struct baikal_acpi_clk_data {
+	struct clk *cmu_clk;
+	struct clk_lookup *cmu_clk_l;
+	struct clk **clk;
+	struct clk_lookup **clk_l;
+	unsigned int clk_num;
+};
+
+static int baikal_acpi_clk_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct acpi_device *ref_dev, *adev = to_acpi_device_node(pdev->dev.fwnode);
+	struct clk_init_data init, *init_ch;
+	struct baikal_clk *cmu, *cmu_ch;
+	struct baikal_acpi_clk_data *clk_data = NULL;
+	union acpi_object *package, *element;
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	int size, i, index, ret = 0;
+	char *str, *str2;
+	const char *cmu_name;
+
+	cmu = devm_kzalloc(dev, sizeof(*cmu), GFP_KERNEL);
+	if (!cmu)
+		return -ENOMEM;
+
+	status = acpi_evaluate_object_typed(adev->handle, "PROP", NULL, &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get PROP data\n");
+		return -ENODEV;
+	}
+
+	package = buffer.pointer;
+	if (package->package.count != 2) {
+		dev_err(dev, "invalid PROP data\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	element = &package->package.elements[0];
+	if (element->type != ACPI_TYPE_INTEGER) {
+		dev_err(dev, "failed to get CMU id\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	cmu->base = element->integer.value;
+
+	element = &package->package.elements[1];
+	if (element->type != ACPI_TYPE_STRING) {
+		dev_err(dev, "failed to get CMU clock name\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	str = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+	if (!str) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	memcpy(str, element->string.pointer, element->string.length);
+	cmu_name = str;
+
+	acpi_os_free(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	init.parent_names = baikal_acpi_ref_clk_str;
+	init.num_parents = 1;
+	init.name = cmu_name;
+	init.ops = &baikal_clk_ops;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	cmu->hw.init = &init;
+
+	clk_data = devm_kzalloc(dev, sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->cmu_clk = clk_register(NULL, &cmu->hw);
+	if (IS_ERR(clk_data->cmu_clk)) {
+		dev_err(dev, "failed to register CMU clock\n");
+		return PTR_ERR(clk_data->cmu_clk);
+	}
+
+	clk_data->cmu_clk_l = clkdev_create(clk_data->cmu_clk, cmu_name, NULL);
+	if (!clk_data->cmu_clk_l) {
+		dev_err(dev, "failed to register CMU clock lookup\n");
+		clk_unregister(clk_data->cmu_clk);
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, clk_data);
+
+	status = acpi_evaluate_object_typed(adev->handle, "CLKS", NULL, &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		buffer.pointer = NULL;
+		goto ret;
+	}
+
+	package = buffer.pointer;
+	if (!package->package.count || package->package.count % 4) {
+		dev_err(dev, "invalid CLKS data\n");
+		ret = -EINVAL;
+		goto ret;
+	}
+
+	clk_data->clk_num = package->package.count >> 2;
+	clk_data->clk = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk *), GFP_KERNEL);
+	if (!clk_data->clk) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	clk_data->clk_l = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk_lookup *), GFP_KERNEL);
+	if (!clk_data->clk_l) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	init_ch = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct clk_init_data), GFP_KERNEL);
+	if (!init_ch) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	cmu_ch = devm_kzalloc(dev, clk_data->clk_num * sizeof(struct baikal_clk), GFP_KERNEL);
+	if (!cmu_ch) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	for (i = 0; i < clk_data->clk_num; ++i) {
+		ref_dev = NULL;
+		size = 0;
+
+		element = &package->package.elements[4 * i];
+		if (element->type == ACPI_TYPE_LOCAL_REFERENCE && element->reference.handle) {
+			if (acpi_bus_get_device(element->reference.handle, &ref_dev))
+				ref_dev = NULL;
+		}
+
+		element = &package->package.elements[4 * i + 1];
+		if (element->type == ACPI_TYPE_STRING) {
+			if (ref_dev)
+				size = strlen(dev_name(&ref_dev->dev)) + 1;
+
+			str = devm_kzalloc(dev, size + element->string.length + 1, GFP_KERNEL);
+			if (str) {
+				if (ref_dev) {
+					memcpy(str, dev_name(&ref_dev->dev), size - 1);
+					str[size - 1] = '_';
+					memcpy(str + size, element->string.pointer, element->string.length);
+				} else {
+					memcpy(str, element->string.pointer, element->string.length);
+				}
+			}
+		} else {
+			dev_err(dev, "failed to process clock device name #%i\n", i);
+			continue;
+		}
+
+		element = &package->package.elements[4 * i + 2];
+		if (element->type == ACPI_TYPE_INTEGER) {
+			index = element->integer.value;
+		} else {
+			dev_err(dev, "failed to process clock device id #%i\n", i);
+			continue;
+		}
+
+		element = &package->package.elements[4 * i + 3];
+		if (element->type == ACPI_TYPE_STRING) {
+			str2 = devm_kzalloc(dev, element->string.length + 1, GFP_KERNEL);
+			if (str2)
+				memcpy(str2, element->string.pointer, element->string.length);
+		} else {
+			str2 = NULL;
+		}
+
+		init_ch[i].parent_names = &cmu_name;
+		init_ch[i].num_parents = 1;
+		init_ch[i].name = str;
+		init_ch[i].ops = &baikal_clk_ops;
+		init_ch[i].flags = CLK_IGNORE_UNUSED;
+
+		cmu_ch[i].base = cmu->base + 0x20 + 0x10 * index;
+		cmu_ch[i].hw.init = &init_ch[i];
+
+		clk_data->clk[i] = clk_register(ref_dev ? &ref_dev->dev : NULL, &cmu_ch[i].hw);
+		if (IS_ERR(clk_data->clk[i])) {
+			dev_err(dev, "failed to register CMU channel clock #%i\n", i);
+			clk_data->clk[i] = NULL;
+			continue;
+		}
+
+		if (ref_dev)
+			clk_data->clk_l[i] = clkdev_create(clk_data->clk[i], str2, "%s", dev_name(&ref_dev->dev));
+		else
+			clk_data->clk_l[i] = clkdev_create(clk_data->clk[i], str2, NULL);
+
+		if (!clk_data->clk_l[i]) {
+			dev_err(dev, "failed to register CMU channel clock lookup #%i\n", i);
+			clk_unregister(clk_data->clk[i]);
+			clk_data->clk[i] = NULL;
+			continue;
+		}
+	}
+
+	clk_data = NULL;
+
+ret:
+	if (buffer.pointer)
+		acpi_os_free(buffer.pointer);
+
+	if (clk_data) {
+		clk_disable_unprepare(clk_data->cmu_clk);
+		clkdev_drop(clk_data->cmu_clk_l);
+		clk_unregister(clk_data->cmu_clk);
+	}
+
+	return ret;
+}
+
+static int baikal_acpi_clk_remove(struct platform_device *pdev)
+{
+	struct baikal_acpi_clk_data *clk_data = platform_get_drvdata(pdev);
+	int i;
+
+	if (clk_data) {
+		clk_disable_unprepare(clk_data->cmu_clk);
+		clkdev_drop(clk_data->cmu_clk_l);
+		clk_unregister(clk_data->cmu_clk);
+
+		for (i = 0; i < clk_data->clk_num; ++i) {
+			if (clk_data->clk_l[i])
+				clkdev_drop(clk_data->clk_l[i]);
+			if (clk_data->clk[i])
+				clk_unregister(clk_data->clk[i]);
+		}
+	}
+
+	return 0;
+}
+
+static const struct acpi_device_id baikal_acpi_clk_device_ids[] = {
+	{ "BKLE0001" },
+	{ }
+};
+
+static struct platform_driver baikal_acpi_clk_driver = {
+	.probe		= baikal_acpi_clk_probe,
+	.remove		= baikal_acpi_clk_remove,
+	.driver		= {
+		.name	= "bs1000-cmu-acpi",
+		.acpi_match_table = ACPI_PTR(baikal_acpi_clk_device_ids)
+	}
+};
+
+static int __init baikal_acpi_clk_driver_init(void)
+{
+	if (!acpi_disabled) {
+		struct clk_lookup *baikal_acpi_ref_clk_lookup;
+
+		baikal_acpi_ref_clk = clk_register_fixed_rate(NULL, baikal_acpi_ref_clk_str[0], NULL, 0, 25000000);
+		if (IS_ERR(baikal_acpi_ref_clk)) {
+			pr_err("%s: failed to register reference clock\n", __func__);
+			return PTR_ERR(baikal_acpi_ref_clk);
+		}
+
+		baikal_acpi_ref_clk_lookup = clkdev_create(baikal_acpi_ref_clk, NULL, "%s", baikal_acpi_ref_clk_str[0]);
+		if (!baikal_acpi_ref_clk_lookup) {
+			clk_unregister_fixed_rate(baikal_acpi_ref_clk);
+			pr_err("%s: failed to register reference clock lookup\n", __func__);
+			return -ENOMEM;
+		}
+
+		clk_prepare_enable(baikal_acpi_ref_clk);
+
+		return platform_driver_register(&baikal_acpi_clk_driver);
+	}
+
+	return 0;
+}
+
+device_initcall(baikal_acpi_clk_driver_init);
+#endif
+
+static const struct of_device_id baikal_clk_of_match[] = {
+	{ .compatible = "baikal,bs1000-cmu" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver bs1000_cmu_driver = {
+	.probe	= baikal_clk_probe,
+	.remove	= baikal_clk_remove,
+	.driver	= {
+		.name = "bs1000-cmu",
+		.of_match_table = baikal_clk_of_match
+	}
+};
+module_platform_driver(bs1000_cmu_driver);
+
+MODULE_DESCRIPTION("Baikal BE-S1000 clock driver");
+MODULE_AUTHOR("Ekaterina Skachko <ekaterina.skachko@baikalelectronics.ru>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bs1000-cmu");
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 5d28553b69f5b..fe4dc79af72b8 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -32,6 +32,9 @@ static const struct of_device_id whitelist[] __initconst = {
 	{ .compatible = "arm,integrator-ap", },
 	{ .compatible = "arm,integrator-cp", },
 
+	{ .compatible = "baikal,bm1000", },
+	{ .compatible = "baikal,bs1000", },
+
 	{ .compatible = "hisilicon,hi3660", },
 
 	{ .compatible = "fsl,imx27", },
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 417dad6355268..92fa2315f35ef 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -524,4 +524,11 @@ config EDAC_BLUEFIELD
 	  Support for error detection and correction on the
 	  Mellanox BlueField SoCs.
 
+config EDAC_BAIKAL
+	tristate "Baikal-M SoC Memory Controller"
+	depends on ARCH_BAIKAL
+	help
+	  Support for error detection and correction on the
+	  Baikal Electronics SoCs.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index d77200c9680bc..be195b779caec 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -87,3 +87,4 @@ obj-$(CONFIG_EDAC_TI)			+= ti_edac.o
 obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
 obj-$(CONFIG_EDAC_ASPEED)		+= aspeed_edac.o
 obj-$(CONFIG_EDAC_BLUEFIELD)		+= bluefield_edac.o
+obj-$(CONFIG_EDAC_BAIKAL)		+= baikal_mc_edac.o
diff --git a/drivers/edac/baikal_mc_edac.c b/drivers/edac/baikal_mc_edac.c
new file mode 100644
index 0000000000000..32fabe8ceef6e
--- /dev/null
+++ b/drivers/edac/baikal_mc_edac.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal-M memory controller edac kernel module.
+ *
+ * Copyright (C) 2021-2022 Baikal Electronics, JSC
+ */
+
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "edac_mc.h"
+
+#define BAIKAL_EDAC_MSG_SIZE	80
+
+#define DDRC_ECCCFG0		0x70 /* ECC Configuration Register */
+#define DDRC_ECCCFG1		0x74 /* ECC Configuration Register */
+#define DDRC_ECCSTAT		0x78 /* ECC Status Register */
+#define DDRC_ECCCLR		0x7c /* ECC Clear Register */
+#define DDRC_ECCERRCNT		0x80 /* ECC Error Counter Register */
+#define DDRC_ECCCADDR0		0x84 /* ECC Corrected Error Address Register 0 */
+#define DDRC_ECCCADDR1		0x88 /* ECC Corrected Error Address Register 1 */
+#define DDRC_ECCCSYN0		0x8c /* ECC Corrected Syndrome Register 0 */
+#define DDRC_ECCCSYN1		0x90 /* ECC Corrected Syndrome Register 1 */
+#define DDRC_ECCCSYN2		0x94 /* ECC Corrected Syndrome Register 2 */
+#define DDRC_ECCBITMASK0	0x98 /* ECC Corrected Data Bit Mask Register 0 */
+#define DDRC_ECCBITMASK1	0x9c /* ECC Corrected Data Bit Mask Register 1 */
+#define DDRC_ECCBITMASK2	0xa0 /* ECC Corrected Data Bit Mask Register 2 */
+#define DDRC_ECCUADDR0		0xa4 /* ECC Uncorrected Error Address Register 0 */
+#define DDRC_ECCUADDR1		0xa8 /* ECC Uncorrected Error Address Register 1 */
+#define DDRC_ECCUSYN0		0xac /* ECC Uncorrected Syndrome Register 0 */
+#define DDRC_ECCUSYN1		0xb0 /* ECC Uncorrected Syndrome Register 1 */
+#define DDRC_ECCUSYN2		0xb4 /* ECC Uncorrected Syndrome Register 2 */
+#define DDRC_ECCPOISONADDR0	0xb8 /* ECC Data Poisoning Address Register 0 */
+#define DDRC_ECCPOISONADDR1	0xbc /* ECC Data Poisoning Address Register 1 */
+
+#define ECCCTL_ENABLE_INTR	0x300
+#define ECCCTL_CLEAR_CERR	(1 << 0)
+#define ECCCTL_CLEAR_UERR	(1 << 1)
+
+#define ECCCNT_CERRS_MASK	(0xffff << 0)
+#define ECCCNT_UERRS_SHIFT	16
+
+#define ECCADDR_RANK_SHIFT	24
+#define ECCADDR_BG_SHIFT	24
+#define ECCADDR_BANK_SHIFT	16
+
+#define ECCADDR_RANK_MASK	(0x3 << ECCADDR_RANK_SHIFT)
+#define ECCADDR_ROW_MASK	(0x3ffff)
+#define ECCADDR_BG_MASK		(0x3 << ECCADDR_BG_SHIFT)
+#define ECCADDR_BANK_MASK	(0x7 << ECCADDR_BANK_SHIFT)
+#define ECCADDR_COL_MASK	(0xfff)
+
+struct baikal_edac_priv {
+	void __iomem *baseaddr;
+	int irq_cer;
+	int irq_uer;
+};
+
+static int ecc_mask_bitnum(unsigned mask)
+{
+	int bitnum = 0;
+
+	while (mask) {
+		mask >>= 1;
+		bitnum++;
+	}
+
+	return bitnum;
+}
+
+static irqreturn_t baikal_mc_err_handler(int irq, void *dev_id)
+{
+	u32 regaddr0, regaddr1;
+	char msg[BAIKAL_EDAC_MSG_SIZE];
+	struct mem_ctl_info *mci = dev_id;
+	const struct baikal_edac_priv *priv = mci->pvt_info;
+
+	if (irq == priv->irq_cer) {
+		regaddr0 = readl(priv->baseaddr + DDRC_ECCCADDR0);
+		regaddr1 = readl(priv->baseaddr + DDRC_ECCCADDR1);
+
+		snprintf(msg, BAIKAL_EDAC_MSG_SIZE, "catched at "
+			"Rank: %d, BankGroup: %d, Bank: %d, Row: %d, Col: %d\n",
+			(regaddr0 & ECCADDR_RANK_MASK) >> ECCADDR_RANK_SHIFT,
+			(regaddr1 & ECCADDR_BG_MASK) >> ECCADDR_BG_SHIFT,
+			(regaddr1 & ECCADDR_BANK_MASK) >> ECCADDR_BANK_SHIFT,
+			ecc_mask_bitnum(regaddr0 & ECCADDR_ROW_MASK),
+			ecc_mask_bitnum(regaddr1 & ECCADDR_COL_MASK));
+
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+				readl(priv->baseaddr + DDRC_ECCERRCNT) & ECCCNT_CERRS_MASK,
+				0, 0, (u64)readl(priv->baseaddr + DDRC_ECCCSYN0) << 32 |
+				readl(priv->baseaddr + DDRC_ECCCSYN1), 0, -1, -1, msg, "");
+
+		writel(ECCCTL_ENABLE_INTR | ECCCTL_CLEAR_CERR,
+				priv->baseaddr + DDRC_ECCCLR);
+
+		return IRQ_HANDLED;
+	}
+
+	if (irq == priv->irq_uer) {
+		regaddr0 = readl(priv->baseaddr + DDRC_ECCUADDR0);
+		regaddr1 = readl(priv->baseaddr + DDRC_ECCUADDR1);
+
+		snprintf(msg, BAIKAL_EDAC_MSG_SIZE, "catched at "
+			"Rank: %d, BankGroup: %d, Bank: %d, Row: %d, Col: %d\n",
+			(regaddr0 & ECCADDR_RANK_MASK) >> ECCADDR_RANK_SHIFT,
+			(regaddr1 & ECCADDR_BG_MASK) >> ECCADDR_BG_SHIFT,
+			(regaddr1 & ECCADDR_BANK_MASK) >> ECCADDR_BANK_SHIFT,
+			ecc_mask_bitnum(regaddr0 & ECCADDR_ROW_MASK),
+			ecc_mask_bitnum(regaddr1 & ECCADDR_COL_MASK));
+
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+				readl(priv->baseaddr + DDRC_ECCERRCNT) >> ECCCNT_UERRS_SHIFT,
+				0, 0, (u64)readl(priv->baseaddr + DDRC_ECCUSYN0) << 32 |
+				readl(priv->baseaddr + DDRC_ECCUSYN1), 0, -1, -1, msg, "");
+
+		writel(ECCCTL_ENABLE_INTR | ECCCTL_CLEAR_UERR,
+				priv->baseaddr + DDRC_ECCCLR);
+
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static bool baikal_get_ecc_state(void __iomem *base)
+{
+	return (readl(base + DDRC_ECCCFG0) & 0x7) == 0x4;
+}
+
+static const struct of_device_id baikal_edac_match[] = {
+	{
+		.compatible = "baikal,bm1000-edac-mc",
+	},
+	{
+		/* null entry */
+	}
+};
+MODULE_DEVICE_TABLE(of, baikal_edac_match);
+
+static int baikal_mc_probe(struct platform_device *pdev)
+{
+	struct edac_mc_layer layers;
+	struct baikal_edac_priv *priv;
+	struct mem_ctl_info *mci;
+	struct dimm_info *dimm;
+	void __iomem *baseaddr;
+	int ret;
+
+	baseaddr = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(baseaddr))
+		return PTR_ERR(baseaddr);
+
+	if (!baikal_get_ecc_state(baseaddr)) {
+		edac_printk(KERN_INFO, EDAC_MC, "%s: ECC not enabled\n",
+				pdev->name);
+		return -ENXIO;
+	}
+
+	/* oversimplified layout */
+	layers.type = EDAC_MC_LAYER_ALL_MEM;
+	layers.size = 1;
+	layers.is_virt_csrow = false;
+
+	mci = edac_mc_alloc(0, 1, &layers,
+			    sizeof(struct baikal_edac_priv));
+	if (!mci) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Failed memory allocation for mc instance\n");
+		return -ENOMEM;
+	}
+
+	priv = mci->pvt_info;
+	priv->baseaddr = baseaddr;
+
+	platform_set_drvdata(pdev, mci);
+
+	mci->mtype_cap = MEM_FLAG_DDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = pdev->dev.driver->name;
+	mci->dev_name = dev_name(&pdev->dev);
+	mci->ctl_name = "baikal_edac_ctl";
+	mci->scrub_mode = SCRUB_HW_TUNABLE;
+	mci->mc_idx = pdev->name[0] != 'e' ? 1 : 0;
+	mci->pdev = &pdev->dev;
+
+	dimm = *mci->dimms;
+	dimm->mci = mci;
+	dimm->grain = 1;
+
+	priv->irq_cer = platform_get_irq(pdev, 1);
+	if (priv->irq_cer < 0) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "No IRQ in DT (%d)\n", priv->irq_cer);
+		ret = priv->irq_cer;
+		goto free_mc;
+	}
+	ret = devm_request_irq(&pdev->dev, priv->irq_cer, baikal_mc_err_handler,
+				0, "baikal_mc_cerr", mci);
+	if (ret) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Unable to request irq %d\n", priv->irq_cer);
+		goto free_mc;
+	}
+
+	priv->irq_uer = platform_get_irq(pdev, 2);
+	if (priv->irq_uer < 0) {
+		edac_printk(KERN_ERR, EDAC_MC,
+				"No IRQ in DT (%d)\n", priv->irq_uer);
+		ret = priv->irq_uer;
+		goto free_irq;
+	}
+	ret = devm_request_irq(&pdev->dev, priv->irq_uer, baikal_mc_err_handler,
+				0, "baikal_mc_uerr", mci);
+	if (ret) {
+		edac_printk(KERN_ERR, EDAC_MC,
+				"Unable to request irq %d\n", priv->irq_uer);
+		goto free_irq;
+	}
+
+	ret = edac_mc_add_mc_with_groups(mci, NULL);
+	if (ret) {
+		edac_printk(KERN_ERR, EDAC_MC,
+				"Failed to register with EDAC core\n");
+		goto free_irq;
+	}
+
+	return 0;
+
+free_irq:
+	devm_free_irq(&pdev->dev, priv->irq_cer, (void *)mci);
+	devm_free_irq(&pdev->dev, priv->irq_uer, (void *)mci);
+
+free_mc:
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return ret;
+}
+
+static int baikal_mc_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+	struct baikal_edac_priv *priv = mci->pvt_info;
+
+	devm_free_irq(&pdev->dev, priv->irq_cer, (void *)mci);
+	devm_free_irq(&pdev->dev, priv->irq_uer, (void *)mci);
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+
+	return 0;
+}
+
+static struct platform_driver baikal_mc_driver = {
+	.driver = {
+		   .name = "baikal-edac",
+		   .of_match_table = baikal_edac_match,
+		   },
+	.probe = baikal_mc_probe,
+	.remove = baikal_mc_remove,
+};
+module_platform_driver(baikal_mc_driver);
+
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Ivan Kapaev <Ivan.Kapaev@baikalelectronics.ru>");
+MODULE_DESCRIPTION("DDR ECC driver for Baikal SoCs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index e2ac5fa5531b9..8318276ae8434 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -75,6 +75,9 @@ static bool __init efi_virtmap_init(void)
 			pr_warn("  EFI remap %pa: failed to create mapping (%d)\n",
 				&phys, ret);
 			return false;
+		} else {
+			pr_info("  EFI remap %pa => %llx\n",
+				&phys, (unsigned long long)md->virt_addr);
 		}
 		/*
 		 * If this entry covers the address of the UEFI system table,
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index f17d01f076c79..93020d0f994a1 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
 obj-y			+= drm/ vga/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
+obj-y			+= arm/
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
new file mode 100644
index 0000000000000..1a6fa3c9a3f60
--- /dev/null
+++ b/drivers/gpu/arm/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
new file mode 100644
index 0000000000000..693b86f8a1444
--- /dev/null
+++ b/drivers/gpu/arm/Kconfig
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
new file mode 100644
index 0000000000000..454756f5dcda9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,170 @@
+#
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r26p0-01rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_USE_CSF ?= 0
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+DEFINES += -DMALI_KBASE_BUILD
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_ctx_sched.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_config.c \
+	mali_kbase_vinstr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c \
+	mali_kbase_debug_mem_view.c \
+	mali_kbase_debug_job_fault.c \
+	mali_kbase_smc.c \
+	mali_kbase_mem_pool.c \
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c \
+	mali_kbase_regs_history_debugfs.c \
+	thirdparty/mali_kbase_mmap.c
+
+
+ifeq ($(CONFIG_MALI_JOB_DUMP),y)
+	SRC += mali_kbase_gwt.c
+endif
+
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+	SRC += mali_kbase_regs_dump_debugfs.c
+endif
+
+
+ccflags-y += -I$(KBASE_PATH)
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+# Kconfig passes in the name with quotes for in-tree builds - remove them.
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME))
+MALI_PLATFORM_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR)
+include $(src)/$(MALI_PLATFORM_DIR)/Kbuild
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+  ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+    include $(src)/ipa/Kbuild
+  endif
+endif
+
+ifeq ($(MALI_USE_CSF),1)
+	include $(src)/csf/Kbuild
+endif
+
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
+	mali_kbase_dma_fence.o \
+	mali_kbase_fence.o
+mali_kbase-$(CONFIG_SYNC) += \
+	mali_kbase_sync_android.o \
+	mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_SYNC_FILE) += \
+	mali_kbase_sync_file.o \
+	mali_kbase_sync_common.o \
+	mali_kbase_fence.o
+
+ifeq ($(MALI_MOCK_TEST),1)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+
+include  $(src)/backend/gpu/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+
+ccflags-y += -I$(src)/backend/gpu
+subdir-ccflags-y += -I$(src)/backend/gpu
+
+# For kutf and mali_kutf_irq_latency_test
+obj-$(CONFIG_MALI_KUTF) += tests/
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 0000000000000..af2a5aa576497
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,213 @@
+#
+# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	default y
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE)
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event. This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
+	  in kernel v4.10, however if backported into the kernel then this
+	  option must be manually selected.
+
+	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
+	  changes have been backported say Y to avoid compilation errors.
+
+# Instrumentation options.
+
+config MALI_JOB_DUMP
+	bool "Enable system level support needed for job dumping"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system level support needed for
+	  job dumping. This is typically used for instrumentation but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 0000000000000..13af9f473890c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,42 @@
+#
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
+KBASE_PATH_RELATIVE = $(CURDIR)
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
+#Add bus logger symbols
+EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
+endif
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 0000000000000..d7898cb3d1a53
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/drivers/gpu/arm/midgard/Mconfig b/drivers/gpu/arm/midgard/Mconfig
new file mode 100644
index 0000000000000..583dec320d8bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Mconfig
@@ -0,0 +1,197 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+menuconfig MALI_MIDGARD
+	bool "Mali Midgard series support"
+	default y
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD && !BACKEND_USER
+	default y if INSTRUMENTATION_STREAMLINE_OLD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+	  This is a legacy interface required by older versions of Streamline.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable legacy DVFS"
+	depends on MALI_MIDGARD && !MALI_DEVFREQ
+	default n
+	help
+	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD
+	default y if PLATFORM_JUNO
+	default y if PLATFORM_CUSTOM
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if the Linux Kernel has built in
+	  support for DMA_BUF fences.
+
+config MALI_PLATFORM_NAME
+	depends on MALI_MIDGARD
+	string "Platform name"
+	default "arndale" if PLATFORM_ARNDALE
+	default "arndale_octa" if PLATFORM_ARNDALE_OCTA
+	default "rk" if PLATFORM_FIREFLY
+	default "hisilicon" if PLATFORM_HIKEY960
+	default "vexpress" if PLATFORM_VEXPRESS
+	default "devicetree"
+	help
+	  Enter the name of the desired platform configuration directory to
+	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
+	  exist.
+
+	  When PLATFORM_CUSTOM is set, this needs to be set manually to
+	  pick up the desired platform files.
+
+config MALI_MOCK_TEST
+	bool
+	depends on MALI_MIDGARD && !RELEASE
+	default y
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default y
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_CORESTACK
+	bool "Support controlling power to the GPU core stack"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature on supported GPUs will let the driver powering
+	  on/off the GPU core stack independently without involving the Power
+	  Domain Controller. This should only be enabled on platforms which
+	  integration of the PDC to the Mali GPU is known to be problematic.
+	  This feature is currently only supported on t-Six and t-HEx GPUs.
+
+	  If unsure, say N.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if DEBUG
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_ERROR_INJECT_RANDOM
+	bool "Random error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT
+	default n
+	help
+	  Injected errors are random, rather than user-driven.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_2MB_ALLOC
+	bool "Attempt to allocate 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Rather than allocating all GPU memory page-by-page, attempt to
+	  allocate 2MB pages from the kernel. This reduces TLB pressure and
+	  helps to prevent memory fragmentation.
+
+	  If in doubt, say N
+
+config MALI_FPGA_BUS_LOGGER
+	bool "Enable bus log integration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+
+config MALI_PWRSOFT_765
+	bool "PWRSOFT-765 ticket"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  PWRSOFT-765 fixes devfreq cooling devices issues. However, they are
+	  not merged in mainline kernel yet. So this define helps to guard those
+	  parts of the code.
+
+# Instrumentation options.
+
+# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+
+source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 0000000000000..dcd8ca4ce4f43
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,62 @@
+#
+# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+BACKEND += \
+	backend/gpu/mali_kbase_cache_policy_backend.c \
+	backend/gpu/mali_kbase_device_hw.c \
+	backend/gpu/mali_kbase_gpu.c \
+	backend/gpu/mali_kbase_gpuprops_backend.c \
+	backend/gpu/mali_kbase_debug_job_fault_backend.c \
+	backend/gpu/mali_kbase_irq_linux.c \
+	backend/gpu/mali_kbase_instr_backend.c \
+	backend/gpu/mali_kbase_jm_as.c \
+	backend/gpu/mali_kbase_jm_hw.c \
+	backend/gpu/mali_kbase_jm_rb.c \
+	backend/gpu/mali_kbase_js_backend.c \
+	backend/gpu/mali_kbase_mmu_hw_direct.c \
+	backend/gpu/mali_kbase_pm_backend.c \
+	backend/gpu/mali_kbase_pm_driver.c \
+	backend/gpu/mali_kbase_pm_metrics.c \
+	backend/gpu/mali_kbase_pm_ca.c \
+	backend/gpu/mali_kbase_pm_always_on.c \
+	backend/gpu/mali_kbase_pm_coarse_demand.c \
+	backend/gpu/mali_kbase_pm_demand.c \
+	backend/gpu/mali_kbase_pm_policy.c \
+	backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+	backend/gpu/mali_kbase_pm_demand_always_powered.c \
+	backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += \
+	backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	BACKEND += backend/gpu/mali_kbase_model_dummy.c
+	BACKEND += backend/gpu/mali_kbase_model_linux.c
+	# HW error simulation
+	BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 0000000000000..196a776f7f9df
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 0000000000000..7378bfd7b3979
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,34 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	kbdev->current_gpu_coherency_mode = mode;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 0000000000000..f78ada74f605e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
new file mode 100644
index 0000000000000..450f6e750a0c7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_debug_job_fault.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+/*GPU_CONTROL_REG(r)*/
+static int gpu_control_reg_snapshot[] = {
+	GPU_ID,
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI
+};
+
+/* JOB_CONTROL_REG(r) */
+static int job_control_reg_snapshot[] = {
+	JOB_IRQ_MASK,
+	JOB_IRQ_STATUS
+};
+
+/* JOB_SLOT_REG(n,r) */
+static int job_slot_reg_snapshot[] = {
+	JS_HEAD_LO,
+	JS_HEAD_HI,
+	JS_TAIL_LO,
+	JS_TAIL_HI,
+	JS_AFFINITY_LO,
+	JS_AFFINITY_HI,
+	JS_CONFIG,
+	JS_STATUS,
+	JS_HEAD_NEXT_LO,
+	JS_HEAD_NEXT_HI,
+	JS_AFFINITY_NEXT_LO,
+	JS_AFFINITY_NEXT_HI,
+	JS_CONFIG_NEXT
+};
+
+/*MMU_REG(r)*/
+static int mmu_reg_snapshot[] = {
+	MMU_IRQ_MASK,
+	MMU_IRQ_STATUS
+};
+
+/* MMU_AS_REG(n,r) */
+static int as_reg_snapshot[] = {
+	AS_TRANSTAB_LO,
+	AS_TRANSTAB_HI,
+	AS_MEMATTR_LO,
+	AS_MEMATTR_HI,
+	AS_FAULTSTATUS,
+	AS_FAULTADDRESS_LO,
+	AS_FAULTADDRESS_HI,
+	AS_STATUS
+};
+
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range)
+{
+	int i, j;
+	int offset = 0;
+	int slot_number;
+	int as_number;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	slot_number = kctx->kbdev->gpu_props.num_job_slots;
+	as_number = kctx->kbdev->gpu_props.num_address_spaces;
+
+	/* get the GPU control registers*/
+	for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				GPU_CONTROL_REG(gpu_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job control registers*/
+	for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] =
+				JOB_CONTROL_REG(job_control_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Job Slot registers*/
+	for (j = 0; j < slot_number; j++)	{
+		for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+			JOB_SLOT_REG(j, job_slot_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	/* get the MMU registers*/
+	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
+		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		offset += 2;
+	}
+
+	/* get the Address space registers*/
+	for (j = 0; j < as_number; j++) {
+		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
+			kctx->reg_dump[offset] =
+					MMU_AS_REG(j, as_reg_snapshot[i]);
+			offset += 2;
+		}
+	}
+
+	WARN_ON(offset >= (reg_range*2/4));
+
+	/* set the termination flag*/
+	kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG;
+	kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n",
+			offset);
+
+	return true;
+}
+
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx)
+{
+	int offset = 0;
+
+	if (kctx->reg_dump == NULL)
+		return false;
+
+	while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
+		kctx->reg_dump[offset+1] =
+				kbase_reg_read(kctx->kbdev,
+						kctx->reg_dump[offset]);
+		offset += 2;
+	}
+	return true;
+}
+
+
+#endif
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 0000000000000..4fd105d8a2541
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,447 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#define dev_pm_opp_find_freq_floor opp_find_freq_floor
+#endif /* Linux >= 3.13 */
+
+/**
+ * opp_translate - Translate nominal OPP frequency from devicetree into real
+ *                 frequency and core mask
+ * @kbdev:     Device pointer
+ * @freq:      Nominal frequency
+ * @core_mask: Pointer to u64 to store core mask to
+ *
+ * Return: Real target frequency
+ *
+ * This function will only perform translation if an operating-points-v2-mali
+ * table is present in devicetree. If one is not present then it will return an
+ * untranslated frequency and all cores enabled.
+ */
+static unsigned long opp_translate(struct kbase_device *kbdev,
+		unsigned long freq, u64 *core_mask)
+{
+	int i;
+
+	for (i = 0; i < kbdev->num_opps; i++) {
+		if (kbdev->opp_table[i].opp_freq == freq) {
+			*core_mask = kbdev->opp_table[i].core_mask;
+			return kbdev->opp_table[i].real_freq;
+		}
+	}
+
+	/* Failed to find OPP - return all cores enabled & nominal frequency */
+	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+
+	return freq;
+}
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long nominal_freq;
+	unsigned long freq = 0;
+	unsigned long voltage;
+	int err;
+	u64 core_mask;
+
+	freq = *target_freq;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	voltage = dev_pm_opp_get_voltage(opp);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+	dev_pm_opp_put(opp);
+#endif
+
+	nominal_freq = freq;
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->current_nominal_freq == nominal_freq) {
+		*target_freq = nominal_freq;
+		return 0;
+	}
+
+	freq = opp_translate(kbdev, nominal_freq, &core_mask);
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq < freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to increase voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+#ifdef CONFIG_REGULATOR
+	if (kbdev->regulator && kbdev->current_voltage != voltage
+			&& kbdev->current_freq > freq) {
+		err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+		if (err) {
+			dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+			return err;
+		}
+	}
+#endif
+
+	kbase_devfreq_set_core_mask(kbdev, core_mask);
+
+	*target_freq = nominal_freq;
+	kbdev->current_voltage = voltage;
+	kbdev->current_nominal_freq = nominal_freq;
+	kbdev->current_freq = freq;
+	kbdev->current_core_mask = core_mask;
+
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+
+	return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->current_nominal_freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct kbasep_pm_metrics diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff);
+
+	stat->busy_time = diff.time_busy;
+	stat->total_time = diff.time_busy + diff.time_idle;
+	stat->current_frequency = kbdev->current_nominal_freq;
+	stat->private_data = NULL;
+
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int count;
+	int i = 0;
+	unsigned long freq;
+	struct dev_pm_opp *opp;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+	if (count < 0)
+		return count;
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_lock();
+#endif
+	for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) {
+		opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+		dev_pm_opp_put(opp);
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
+
+		dp->freq_table[i] = freq;
+	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+	rcu_read_unlock();
+#endif
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	if (!IS_ERR_OR_NULL(kbdev->devfreq)) {
+		dp = kbdev->devfreq->profile;
+		if (!IS_ERR_OR_NULL(dp))
+			kfree(dp->freq_table);
+	}
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
+{
+	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
+			"operating-points-v2", 0);
+	struct device_node *node;
+	int i = 0;
+	int count;
+	u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+
+	if (!opp_node)
+		return 0;
+	if (!of_device_is_compatible(opp_node, "operating-points-v2-mali"))
+		return 0;
+
+	count = dev_pm_opp_get_opp_count(kbdev->dev);
+	kbdev->opp_table = kmalloc_array(count,
+			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
+	if (!kbdev->opp_table)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(opp_node, node) {
+		u64 core_mask;
+		u64 opp_freq, real_freq;
+		const void *core_count_p;
+
+		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
+			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+			continue;
+		}
+		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
+			real_freq = opp_freq;
+		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
+			core_mask = shader_present;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) &&
+				core_mask != shader_present) {
+			dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+					opp_freq);
+			continue;
+		}
+
+		core_count_p = of_get_property(node, "opp-core-count", NULL);
+		if (core_count_p) {
+			u64 remaining_core_mask =
+				kbdev->gpu_props.props.raw_props.shader_present;
+			int core_count = be32_to_cpup(core_count_p);
+
+			core_mask = 0;
+
+			for (; core_count > 0; core_count--) {
+				int core = ffs(remaining_core_mask);
+
+				if (!core) {
+					dev_err(kbdev->dev, "OPP has more cores than GPU\n");
+					return -ENODEV;
+				}
+
+				core_mask |= (1ull << (core-1));
+				remaining_core_mask &= ~(1ull << (core-1));
+			}
+		}
+
+		if (!core_mask) {
+			dev_err(kbdev->dev, "OPP has invalid core mask of 0\n");
+			return -ENODEV;
+		}
+
+		kbdev->opp_table[i].opp_freq = opp_freq;
+		kbdev->opp_table[i].real_freq = real_freq;
+		kbdev->opp_table[i].core_mask = core_mask;
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
+				i, opp_freq, real_freq, core_mask);
+
+		i++;
+	}
+
+	kbdev->num_opps = i;
+
+	return 0;
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	if (!kbdev->clock) {
+		dev_err(kbdev->dev, "Clock not available for devfreq\n");
+		return -ENODEV;
+	}
+
+	kbdev->current_freq = clk_get_rate(kbdev->clock);
+	kbdev->current_nominal_freq = kbdev->current_freq;
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->current_freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	if (dp->max_state > 0) {
+		/* Record the maximum frequency possible */
+		kbdev->gpu_props.props.core_props.gpu_freq_khz_max =
+			dp->freq_table[0] / 1000;
+	};
+
+	err = kbase_devfreq_init_core_mask_table(kbdev);
+	if (err)
+		return err;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR(kbdev->devfreq)) {
+		kfree(dp->freq_table);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
+	 * set drvdata explicitly so IPA models can access kbdev. */
+	dev_set_drvdata(&kbdev->devfreq->dev, kbdev);
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	err = kbase_ipa_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		goto cooling_failed;
+	}
+
+	kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+			kbdev->dev->of_node,
+			kbdev->devfreq,
+			&kbase_ipa_power_model_ops);
+	if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+		err = PTR_ERR(kbdev->devfreq_cooling);
+		dev_err(kbdev->dev,
+			"Failed to register cooling device (%d)\n",
+			err);
+		goto cooling_failed;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	if (devfreq_remove_device(kbdev->devfreq))
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->devfreq_cooling)
+		devfreq_cooling_unregister(kbdev->devfreq_cooling);
+
+	kbase_ipa_term(kbdev);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	kfree(kbdev->opp_table);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 0000000000000..0634038c5feed
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 0000000000000..ebc3022299b58
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+
+#ifdef CONFIG_DEBUG_FS
+
+
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
+{
+	struct kbase_io_access *old_buf;
+	struct kbase_io_access *new_buf;
+	unsigned long flags;
+
+	if (!new_size)
+		goto out_err; /* The new size must not be 0 */
+
+	new_buf = vmalloc(new_size * sizeof(*h->buf));
+	if (!new_buf)
+		goto out_err;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	old_buf = h->buf;
+
+	/* Note: we won't bother with copying the old data over. The dumping
+	 * logic wouldn't work properly as it relies on 'count' both as a
+	 * counter and as an index to the buffer which would have changed with
+	 * the new array. This is a corner case that we don't need to support.
+	 */
+	h->count = 0;
+	h->size = new_size;
+	h->buf = new_buf;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	vfree(old_buf);
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+
+int kbase_io_history_init(struct kbase_io_history *h, u16 n)
+{
+	h->enabled = false;
+	spin_lock_init(&h->lock);
+	h->count = 0;
+	h->size = 0;
+	h->buf = NULL;
+	if (kbase_io_history_resize(h, n))
+		return -1;
+
+	return 0;
+}
+
+
+void kbase_io_history_term(struct kbase_io_history *h)
+{
+	vfree(h->buf);
+	h->buf = NULL;
+}
+
+
+/* kbase_io_history_add - add new entry to the register access history
+ *
+ * @h: Pointer to the history data structure
+ * @addr: Register address
+ * @value: The value that is either read from or written to the register
+ * @write: 1 if it's a register write, 0 if it's a read
+ */
+static void kbase_io_history_add(struct kbase_io_history *h,
+		void __iomem const *addr, u32 value, u8 write)
+{
+	struct kbase_io_access *io;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	io = &h->buf[h->count % h->size];
+	io->addr = (uintptr_t)addr | write;
+	io->value = value;
+	++h->count;
+	/* If count overflows, move the index by the buffer size so the entire
+	 * buffer will still be dumped later */
+	if (unlikely(!h->count))
+		h->count = h->size;
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+void kbase_io_history_dump(struct kbase_device *kbdev)
+{
+	struct kbase_io_history *const h = &kbdev->io_history;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!unlikely(h->enabled))
+		return;
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	dev_err(kbdev->dev, "Register IO History:");
+	iters = (h->size > h->count) ? h->count : h->size;
+	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	writel(value, kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				value, 1);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
+	val = readl(kbdev->reg + offset);
+
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbdev->io_history.enabled))
+		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
+				val, 0);
+#endif /* CONFIG_DEBUG_FS */
+	dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
+
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev:    Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ *            was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS));
+	address = (u64) kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
+	address |= kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status & 0xFF,
+			kbase_exception_name(kbdev, status),
+			address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been
+	 * cleared. This is because it might trigger further power transitions
+	 * and we don't want to miss the interrupt raised to notify us that
+	 * these further transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL)
+		kbase_pm_power_changed(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 0000000000000..928efe950260c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ * @value:  Value to write
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev:  Kbase device pointer
+ * @offset: Offset of register
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val:   The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 0000000000000..881d50c14af8b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbasep_platform_device_init(kbdev);
+	if (err)
+		return err;
+
+	err = kbase_pm_runtime_init(kbdev);
+	if (err)
+		goto fail_runtime_pm;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto fail_interrupts;
+
+	err = kbase_hwaccess_pm_init(kbdev);
+	if (err)
+		goto fail_pm;
+
+	return 0;
+
+fail_pm:
+	kbase_release_interrupts(kbdev);
+fail_interrupts:
+	kbase_pm_runtime_term(kbdev);
+fail_runtime_pm:
+	kbasep_platform_device_term(kbdev);
+
+	return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_term(kbdev);
+	kbase_release_interrupts(kbdev);
+	kbase_pm_runtime_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+	int err;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		return err;
+
+	err = kbase_backend_timer_init(kbdev);
+	if (err)
+		goto fail_timer;
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+		dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+		err = -EINVAL;
+		goto fail_interrupt_test;
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	err = kbase_job_slot_init(kbdev);
+	if (err)
+		goto fail_job_slot;
+
+	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+	return 0;
+
+fail_job_slot:
+
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+	kbase_backend_timer_term(kbdev);
+fail_timer:
+	kbase_hwaccess_pm_halt(kbdev);
+
+	return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+	kbase_job_slot_halt(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_backend_timer_term(kbdev);
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 0000000000000..39773e6e63aa4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
+
+	regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES));
+	regdump->core_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(CORE_FEATURES));
+	regdump->tiler_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_FEATURES));
+	regdump->mem_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MEM_FEATURES));
+	regdump->mmu_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(MMU_FEATURES));
+	regdump->as_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(AS_PRESENT));
+	regdump->js_present = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_PRESENT));
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JS_FEATURES_REG(i)));
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
+
+	regdump->thread_max_threads = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_THREADS));
+	regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
+	regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
+	regdump->thread_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_FEATURES));
+	regdump->thread_tls_alloc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(THREAD_TLS_ALLOC));
+
+	regdump->shader_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_LO));
+	regdump->shader_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_PRESENT_HI));
+
+	regdump->tiler_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_LO));
+	regdump->tiler_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_PRESENT_HI));
+
+	regdump->l2_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_LO));
+	regdump->l2_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_PRESENT_HI));
+
+	regdump->stack_present_lo = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_LO));
+	regdump->stack_present_hi = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(STACK_PRESENT_HI));
+}
+
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+		regdump->coherency_features =
+				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 0000000000000..6c69132a297c4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,497 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES);
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					struct kbase_ioctl_hwcnt_enable *enable)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	int ret;
+	u32 prfcnt_config;
+
+	/* alignment failure */
+	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available
+	 */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any
+	 * errors (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, true, true);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+						PRFCNT_SAMPLE_COMPLETED);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = enable->dump_buffer;
+
+	/* Request the clean */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.backend.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
+	 * buffer is valid */
+	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+					&kbdev->hwcnt.backend.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					enable->dump_buffer & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					enable->dump_buffer >> 32);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+					enable->jm_bm);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+					enable->shader_bm);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+					enable->mmu_l2_bm);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+	 * HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+							enable->tiler_bm);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = 0;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_release_cores(kbdev, true, true);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ out_err:
+	return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	int err = -EINVAL;
+	u32 irq_mask;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+	}
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_release_cores(kbdev, true, true);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+									kctx);
+
+	err = 0;
+
+ out:
+	return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're
+		 * resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.backend.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted
+	 */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+					kbdev->hwcnt.addr & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+					kbdev->hwcnt.addr >> 32);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+					kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_PRFCNT_SAMPLE);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = 0;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success)
+{
+	unsigned long flags;
+	bool complete = false;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+		*success = true;
+		complete = true;
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		*success = false;
+		complete = true;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device,
+						hwcnt.backend.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+				kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.backend.triggered = 1;
+	wake_up(&kbdev->hwcnt.backend.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
+	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+		if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* All finished and idle */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+			kbdev->hwcnt.backend.triggered = 1;
+			wake_up(&kbdev->hwcnt.backend.wait);
+		} else {
+			int ret;
+			/* Always clean and invalidate the cache after a successful dump
+			 */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+			ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+						&kbdev->hwcnt.backend.cache_clean_work);
+			KBASE_DEBUG_ASSERT(ret);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int err;
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.backend.wait,
+					kbdev->hwcnt.backend.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+		err = -EINVAL;
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+							KBASE_INSTR_STATE_IDLE);
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Check it's the context previously set up and we're not already
+	 * dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+							KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_PRFCNT_CLEAR);
+
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+						kbasep_cache_clean_worker);
+	kbdev->hwcnt.backend.triggered = 0;
+
+	kbdev->hwcnt.backend.cache_clean_wq =
+			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 0000000000000..fb55d2d56f2c6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/* State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/* State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/* Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/* We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/* Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/* Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/* An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+	wait_queue_head_t wait;
+	int triggered;
+
+	enum kbase_instr_state state;
+	wait_queue_head_t cache_clean_wait;
+	struct workqueue_struct *cache_clean_wq;
+	struct work_struct  cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 0000000000000..608379e4ca0f6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 0000000000000..ca3c048b637a0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ *                          execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 0000000000000..dd0279a03abc3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,474 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	atomic_inc(&kbdev->faults_pending);
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val) {
+		atomic_dec(&kbdev->faults_pending);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	atomic_dec(&kbdev->faults_pending);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.backend.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
+
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+					irq_handler_t custom_handler,
+					int irq_type)
+{
+	int result = 0;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+						(GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+						kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = -EINVAL;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+					kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer,
+						struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+				struct kbase_device * const kbdev, u32 tag)
+{
+	int err = 0;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return 0;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+			err = -EINVAL;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer,
+					CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function =
+						kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1);
+
+			hrtimer_start(&kbasep_irq_test_data.timer,
+					HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+					HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait,
+					kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+				err = -EINVAL;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+						kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+				kbdev->irqs[tag].flags | IRQF_SHARED,
+				dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+						kbdev->irqs[tag].irq, tag);
+			err = -EINVAL;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val);
+
+	return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+					struct kbase_device * const kbdev)
+{
+	int err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+				kbdev->irqs[i].flags | IRQF_SHARED,
+				dev_name(kbdev->dev),
+				kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+							kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 0000000000000..c8153ba4c1216
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,244 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ *   setting up the global runpool_irq structure and the context on the AS,
+ *   Activating the MMU on the AS,
+ *   Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ *   kbasep_js_kctx_info.jsctx_mutex held,
+ *   kbasep_js_device_data.runpool_mutex held,
+ *   AS transaction mutex held,
+ *   Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	int i;
+
+	if (kbdev->hwaccess.active_kctx[js] == kctx) {
+		/* Context is already active */
+		return true;
+	}
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		if (kbdev->as_to_kctx[i] == kctx) {
+			/* Context already has ASID - mark as active */
+			return true;
+		}
+	}
+
+	/* Context does not have address space assigned */
+	return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	int as_nr = kctx->as_nr;
+
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		WARN(1, "Attempting to release context without ASID\n");
+		return;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_read(&kctx->refcount) != 1) {
+		WARN(1, "Attempting to release active ASID\n");
+		return;
+	}
+
+	kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+
+	kbase_ctx_sched_release_ctx(kctx);
+	kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+}
+
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	int i;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbasep_js_kctx_info *as_js_kctx_info;
+		struct kbase_context *as_kctx;
+
+		as_kctx = kbdev->as_to_kctx[i];
+		as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+		/* Don't release privileged or active contexts, or contexts with
+		 * jobs running.
+		 * Note that a context will have at least 1 reference (which
+		 * was previously taken by kbasep_js_schedule_ctx()) until
+		 * descheduled.
+		 */
+		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+			atomic_read(&as_kctx->refcount) == 1) {
+			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+								as_kctx)) {
+				WARN(1, "Failed to retain active context\n");
+
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				return KBASEP_AS_NR_INVALID;
+			}
+
+			kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+			/* Drop and retake locks to take the jsctx_mutex on the
+			 * context we're about to release without violating lock
+			 * ordering
+			 */
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+			/* Release context from address space */
+			mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+
+			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+								as_kctx,
+								true);
+
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+				return i;
+			}
+
+			/* Context was retained while locks were dropped,
+			 * continue looking for free AS */
+
+			mutex_unlock(&js_devdata->runpool_mutex);
+			mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+			mutex_lock(&js_devdata->runpool_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *new_address_space = NULL;
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx) {
+			WARN(1, "Context is already scheduled in\n");
+			return false;
+		}
+	}
+
+	new_address_space = &kbdev->as[as_nr];
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+		/* We need to retain it to keep the corresponding address space
+		 */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	}
+
+	return true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 0000000000000..b4d2ae1cc4e82
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom:	Atom associated with this entry
+ */
+struct rb_entry {
+	struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries:		Ringbuffer entries
+ * @last_context:	The last context to submit a job on this slot
+ * @read_idx:		Current read index of buffer
+ * @write_idx:		Current write index of buffer
+ * @job_chain_flag:	Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+	struct rb_entry entries[SLOT_RB_SIZE];
+
+	struct kbase_context *last_context;
+
+	u8 read_idx;
+	u8 write_idx;
+
+	u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb:			Slot ringbuffers
+ * @rmu_workaround_flag:	When PRLAM-8987 is present, this flag determines
+ *				whether slots 0/1 or slot 2 are currently being
+ *				pulled from
+ * @scheduling_timer:		The timer tick used for rescheduling jobs
+ * @timer_running:		Is the timer running? The runpool_mutex must be
+ *				held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
+ * @reset_gpu:			Set to a KBASE_RESET_xxx value (see comments)
+ * @reset_workq:		Work queue for performing the reset
+ * @reset_work:			Work item for performing the reset
+ * @reset_wait:			Wait event signalled when the reset is complete
+ * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
+ *
+ * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ */
+struct kbase_backend_data {
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+	bool rmu_workaround_flag;
+
+	struct hrtimer scheduling_timer;
+
+	bool timer_running;
+	bool suspend_timer;
+
+	atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING     0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED        1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED       2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING       3
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 0000000000000..fee19aa803afd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1485 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_vinstr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+						struct kbase_context *kctx)
+{
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
+}
+
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
+				base_jd_core_req core_req,
+				int js)
+{
+	u64 affinity;
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+			BASE_JD_REQ_T) {
+		/* Tiler-only atom */
+		/* If the hardware supports XAFFINITY then we'll only enable
+		 * the tiler (which is the default so this is a no-op),
+		 * otherwise enable shader core 0.
+		 */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			affinity = 1;
+		else
+			affinity = 0;
+	} else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+			BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+		struct mali_base_gpu_coherent_group_info *coherency_info =
+			&kbdev->gpu_props.props.coherency_info;
+
+		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+				kbdev->pm.debug_core_mask[js];
+
+		/* JS2 on a dual core group system targets core group 1. All
+		 * other cases target core group 0.
+		 */
+		if (js == 2 && num_core_groups > 1)
+			affinity &= coherency_info->group[1].core_mask;
+		else
+			affinity &= coherency_info->group[0].core_mask;
+	} else {
+		/* Use all cores */
+		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+				kbdev->pm.debug_core_mask[js];
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+					affinity & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+					affinity >> 32);
+
+	return affinity;
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+	u64 affinity;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+						jc_head & 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+						jc_head >> 32);
+
+	affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg = kctx->as_nr;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
+			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	cfg |= JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								true;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+								false;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+				katom, kctx, js, jc_head);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+							(u32)affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(
+				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+				kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+			affinity, cfg);
+	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kctx,
+		&kbdev->gpu_props.props.raw_props.js_features[
+			katom->slot_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[js],
+			"ctx_nr,atom_nr");
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+
+		trace_gpu_sched_switch(
+				kbasep_make_job_slot_string(js, js_string,
+						sizeof(js_string)),
+				ktime_to_ns(katom->start_timestamp),
+				(u32)katom->kctx->id, 0, katom->work_id);
+		kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+						JS_COMMAND_START);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the time the job was submitted, to
+ * work out the best estimate (which might still result in an over-estimate to
+ * the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+						struct kbase_device *kbdev,
+						int js,
+						ktime_t end_timestamp)
+{
+	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t timestamp_diff;
+		/* The atom in the HEAD */
+		katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		timestamp_diff = ktime_sub(end_timestamp,
+				katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate
+			 * than what's currently stored. This is because our
+			 * estimate that accounts for the throttle time may be
+			 * too much of an overestimate */
+			katom->start_timestamp = end_timestamp;
+		}
+	}
+}
+
+/**
+ * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
+ * tracepoint
+ * @kbdev: kbase device
+ * @js: job slot
+ *
+ * Make a tracepoint call to the instrumentation module informing that
+ * softstop happened on given lpu (job slot).
+ */
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
+					int js)
+{
+	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		&kbdev->gpu_props.props.raw_props.js_features[js]);
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	while (done) {
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job
+				 * slot reported failure */
+				completion_code = kbase_reg_read(kbdev,
+					JOB_SLOT_REG(i, JS_STATUS));
+
+				if (completion_code == BASE_JD_EVENT_STOPPED) {
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+					kbase_trace_mali_job_slots_event(
+						GATOR_MAKE_EVENT(
+						GATOR_JOB_SLOT_SOFT_STOPPED, i),
+								NULL, 0);
+#endif
+
+					kbasep_trace_tl_event_lpu_softstop(
+						kbdev, i);
+
+					/* Soft-stopped job - read the value of
+					 * JS<n>_TAIL so that the job chain can
+					 * be resumed */
+					job_tail = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_LO)) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_TAIL_HI))
+						 << 32);
+				} else if (completion_code ==
+						BASE_JD_EVENT_NOT_STARTED) {
+					/* PRLAM-10673 can cause a TERMINATED
+					 * job to come back as NOT_STARTED, but
+					 * the error interrupt helps us detect
+					 * it */
+					completion_code =
+						BASE_JD_EVENT_TERMINATED;
+				}
+
+				kbase_gpu_irq_evict(kbdev, i, completion_code);
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+					done & ((1 << i) | (1 << (i + 16))));
+			active = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
+
+			if (((active >> i) & 1) == 0 &&
+					(((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work
+				 * around:
+				 *
+				 *  1. A job slot has a job in both current and
+				 *     next registers
+				 *  2. The job in current completes
+				 *     successfully, the IRQ handler reads
+				 *     RAWSTAT and calls this function with the
+				 *     relevant bit set in "done"
+				 *  3. The job in the next registers becomes the
+				 *     current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line
+				 *     above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not
+				 *     the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs
+				 *     have now completed the relevant bits for
+				 *     the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly
+				 * assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed
+				 * the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs
+				 * left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know
+				 * that there has been a new failure that we
+				 * didn't previously know about, so we make sure
+				 * that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done
+				 * has the relevant high bit set) then we know
+				 * that the value read back from
+				 * JOB_IRQ_JS_STATE is the correct number of
+				 * remaining jobs because the failed job will
+				 * have prevented any futher jobs from starting
+				 * execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've
+					 * missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+							completion_code);
+
+			nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+									i);
+
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_gpu_complete_hw(kbdev, i,
+								completion_code,
+								job_tail,
+								&end_timestamp);
+					kbase_jm_try_kick_all(kbdev);
+				} else {
+					/* More than one job has completed.
+					 * Since this is not the last job being
+					 * reported this time it must have
+					 * passed. This is because the hardware
+					 * will not allow further jobs in a job
+					 * slot to complete until the failed job
+					 * is cleared from the IRQ status.
+					 */
+					kbase_gpu_complete_hw(kbdev, i,
+							BASE_JD_EVENT_DONE,
+							0,
+							&end_timestamp);
+				}
+				nr_done--;
+			}
+ spurious:
+			done = kbase_reg_read(kbdev,
+					JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by
+				 * PRLAM-10883 */
+				if (((active >> i) & 1) && (0 ==
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(i,
+							JS_STATUS)))) {
+					/* Force job slot to be processed again
+					 */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+			if (done)
+				end_timestamp = ktime_get();
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+								end_timestamp);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do
+		 * it early (without waiting for a timeout) because some jobs
+		 * have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	bool soft_stops_allowed = true;
+
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = false;
+	}
+	return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+						base_jd_core_req core_reqs)
+{
+	bool hard_stops_allowed = true;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = false;
+	}
+	return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom)
+{
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_LO)))
+			| (((u64) kbase_reg_read(kbdev,
+					JOB_SLOT_REG(js, JS_HEAD_HI)))
+									<< 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+								target_katom);
+
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having
+		 * been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+
+		/* Mark the point where we issue the soft-stop command */
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
+		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+								core_reqs);
+
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and
+			 *    soft-stop has not occurred). In this case the GPU
+			 *    will be reset by CFS if the job remains on the
+			 *    GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context
+			 *    will attempt to hard-stop the job. However it also
+			 *    has a watchdog which will cause the GPU to be
+			 *    reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as
+			 *    BASE_HW_ISSUE_8245 is defined then the GPU will be
+			 *    reset.
+			 *
+			 * All three cases result in the GPU being reset if the
+			 * hard-stop fails, so it is safe to just return and
+			 * ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		head = kbase_gpu_inspect(kbdev, js, 0);
+		head_kctx = head->kctx;
+
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+						head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+						0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+							head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+							head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+							0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+							js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+							0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+							0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Cancel any remaining running jobs for this kctx  */
+	mutex_lock(&kctx->jctx.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		if (!work_pending(&kctx->jctx.atoms[i].work))
+			kctx->jctx.atoms[i].event_code =
+						BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev;
+	int js = target_katom->slot_nr;
+	int priority = target_katom->sched_priority;
+	int i;
+	bool stop_sent = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		if ((kbdev->js_ctx_scheduling_mode ==
+			KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) &&
+				(katom->kctx != kctx))
+			continue;
+
+		if (katom->sched_priority > priority) {
+			if (!stop_sent)
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
+						target_katom);
+
+			kbase_job_slot_softstop(kbdev, js, katom);
+			stop_sent = true;
+		}
+	}
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
+
+	timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
+			kctx->jctx.job_nr == 0, timeout);
+
+	if (timeout != 0)
+		timeout = wait_event_timeout(
+			kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			!kbase_ctx_flag(kctx, KCTX_SCHEDULED),
+			timeout);
+
+	/* Neither wait timed out; all done! */
+	if (timeout != 0)
+		goto exit;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev,
+			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+			ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+
+	/* Wait for the reset to complete */
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+#else
+	dev_warn(kbdev->dev,
+		"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+		ZAP_TIMEOUT);
+
+#endif
+exit:
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+	u32 flush_id = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH));
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return flush_id;
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->hwaccess.backend.reset_workq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+#endif
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
+ * @kbdev: kbase device pointer
+ * @kctx:  context to check against
+ * @js:	   slot to check
+ * @target_katom: An atom to check, or NULL if all atoms from @kctx on
+ *                slot @js should be checked
+ *
+ * This checks are based upon parameters that would normally be passed to
+ * kbase_job_slot_hardstop().
+ *
+ * In the event of @target_katom being NULL, this will check the last jobs that
+ * are likely to be running on the slot to see if a) they belong to kctx, and
+ * so would be stopped, and b) whether they have AFBC
+ *
+ * In that case, It's guaranteed that a job currently executing on the HW with
+ * AFBC will be detected. However, this is a conservative check because it also
+ * detects jobs that have just completed too.
+ *
+ * Return: true when hard-stop _might_ stop an afbc atom, else false.
+ */
+static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
+		struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom)
+{
+	bool ret = false;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* When we have an atom the decision can be made straight away. */
+	if (target_katom)
+		return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC);
+
+	/* Otherwise, we must chweck the hardware to see if it has atoms from
+	 * this context with AFBC. */
+	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+		struct kbase_jd_atom *katom;
+
+		katom = kbase_gpu_inspect(kbdev, js, i);
+		if (!katom)
+			continue;
+
+		/* Ignore atoms from other contexts, they won't be stopped when
+		 * we use this for checking if we should hard-stop them */
+		if (katom->kctx != kctx)
+			continue;
+
+		/* An atom on this slot and this context: check for AFBC */
+		if (katom->core_req & BASE_JD_REQ_FS_AFBC) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags:      Flags to pass in about the soft-stop
+ *
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool stopped;
+#if KBASE_GPU_RESET_EN
+	/* We make the check for AFBC before evicting/stopping atoms.  Note
+	 * that no other thread can modify the slots whilst we have the
+	 * hwaccess_lock. */
+	int needs_workaround_for_afbc =
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
+			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
+					 target_katom);
+#endif
+
+	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+							target_katom,
+							JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+			needs_workaround_for_afbc)) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+
+	kbase_io_history_dump(kbdev);
+
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
+	dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	bool try_schedule = false;
+	bool silent = false;
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device,
+						hwaccess.backend.reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	/* Make sure the timer has completed - this cannot be done from
+	 * interrupt context, so this cannot be done within
+	 * kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle,
+		 * there's no need to reset it */
+		atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->hwaccess.backend.reset_wait);
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock(&kbdev->mmu_mask_change);
+	/* We're about to flush out the IRQs and their bottom half's */
+	kbdev->irq_reset_flush = true;
+
+	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+	 * spinlock; this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts_nolock(kbdev);
+
+	spin_unlock(&kbdev->mmu_mask_change);
+	spin_unlock(&kbdev->hwaccess_lock);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Flush out any in-flight work items */
+	kbase_flush_mmu_wqs(kbdev);
+
+	/* The flush has completed so reset the active indicator */
+	kbdev->irq_reset_flush = false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
+		/* Ensure that L2 is not transitioning when we send the reset
+		 * command */
+		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2))
+			;
+
+		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited
+	 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+	 * assume that anything that is still left on the GPU is stuck there and
+	 * we'll kill it when we reset the GPU */
+
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+								RESET_TIMEOUT);
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_backend_reset(kbdev, &end_timestamp);
+	kbase_pm_metrics_update(kbdev, NULL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, 0);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_pm_enable_interrupts(kbdev);
+
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
+	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+		try_schedule = true;
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Try submitting some jobs to restart processing */
+	if (try_schedule) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+									0);
+		kbase_js_sched_all(kbdev);
+	}
+
+	/* Process any pending slot updates */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+						hwaccess.backend.reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+						KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* To prevent getting incorrect registers when dumping failed job,
+	 * skip early reset.
+	 */
+	if (kbdev->job_fault_debug != false)
+		return;
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+	 * been called), and that no other thread beat this thread to starting
+	 * the reset */
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+			KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+						KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+					&kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ *   The function returns a boolean which should be interpreted as follows:
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
+ *   false - Another thread is performing a reset, kbase_reset_gpu should
+ *   not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_PREPARED) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return false;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for
+	 * a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+						KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_COMMITTED);
+
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+			kbdev->reset_timeout_ms);
+	hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+			HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+			HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 0000000000000..831491e956a13
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev:		Device pointer
+ * @s:			Job slot
+ * @completion_code:	Completion code of job reported by GPU
+ * @job_tail:		Job tail address reported by GPU
+ * @end_timestamp:	Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+					u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string,
+						size_t js_size)
+{
+	snprintf(js_string, js_size, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev:	Device pointer
+ * @katom:	Atom to submit
+ * @js:		Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom,
+				int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ *						   on the specified atom
+ * @kbdev:		Device pointer
+ * @js:			Job slot to stop on
+ * @action:		The action to perform, either JSn_COMMAND_HARD_STOP or
+ *			JSn_COMMAND_SOFT_STOP
+ * @core_reqs:		Core requirements of atom to stop
+ * @target_katom:	Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the hwaccess_lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+					int js,
+					u32 action,
+					base_jd_core_req core_reqs,
+					struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ *					 slot belonging to a given context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @katom:	Specific atom to stop. May be NULL
+ * @js:		Job slot to hard stop
+ * @action:	The action to perform, either JSn_COMMAND_HARD_STOP or
+ *		JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * @kbdev: Device pointer
+ *
+ * Caller must not be in IRQ context
+ */
+void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+
+static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev,
+		base_jd_core_req core_req)
+{
+	return core_req & BASE_JD_REQ_T;
+}
+
+static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev,
+		base_jd_core_req core_req)
+{
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+		return true;
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+			BASE_JD_REQ_T) {
+		/* Tiler only atom */
+		return false;
+	}
+
+	return true;
+}
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 0000000000000..79777b7874039
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1834 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+	rb->write_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev:         Device pointer
+ * @js:            Job slot to remove atom from
+ * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+						int js,
+						ktime_t *end_timestamp)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+	struct kbase_jd_atom *katom;
+
+	if (SLOT_RB_EMPTY(rb)) {
+		WARN(1, "GPU ringbuffer unexpectedly empty\n");
+		return NULL;
+	}
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+	kbase_gpu_release_atom(kbdev, katom, end_timestamp);
+
+	rb->read_idx++;
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+	return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+		return NULL; /* idx out of range */
+
+	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js)
+{
+	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+	if (SLOT_RB_EMPTY(rb))
+		return NULL;
+
+	return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ *         false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (!katom)
+			return false;
+		if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+				katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev:  Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+	int js;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+			if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+				return true;
+		}
+	}
+	return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED))
+			nr++;
+	}
+
+	return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		if (kbase_gpu_inspect(kbdev, js, i))
+			nr++;
+	}
+
+	return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+				enum kbase_atom_gpu_rb_state min_rb_state)
+{
+	int nr = 0;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < SLOT_RB_SIZE; i++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+		if (katom && (katom->gpu_rb_state >= min_rb_state))
+			nr++;
+	}
+
+	return nr;
+}
+
+/**
+ * check_secure_atom - Check if the given atom is in the given secure state and
+ *                     has a ringbuffer state of at least
+ *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @katom:  Atom pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if atom is in the given state, false otherwise
+ */
+static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
+{
+	if (katom->gpu_rb_state >=
+			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+			((kbase_jd_katom_is_protected(katom) && secure) ||
+			(!kbase_jd_katom_is_protected(katom) && !secure)))
+		return true;
+
+	return false;
+}
+
+/**
+ * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
+ *                                secure state in the ringbuffers of at least
+ *                                state
+ *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
+ * @kbdev:  Device pointer
+ * @secure: Desired secure state
+ *
+ * Return: true if any atoms are in the given state, false otherwise
+ */
+static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
+		bool secure)
+{
+	int js, i;
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (i = 0; i < SLOT_RB_SIZE; i++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, i);
+
+			if (katom) {
+				if (check_secure_atom(katom, secure))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return 0;
+	}
+
+	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+						int js,
+						struct kbase_jd_atom *katom)
+{
+	base_jd_core_req core_req = katom->core_req;
+
+	/* NOTE: The following uses a number of FALLTHROUGHs to optimize the
+	 * calls to this function. Ending of the function is indicated by BREAK
+	 * OUT.
+	 */
+	switch (katom->coreref_state) {
+		/* State when job is first attempted to be run */
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Request the cores */
+		kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
+
+		/* Proceed to next state */
+		katom->coreref_state =
+		KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		{
+			bool cores_ready;
+
+			cores_ready = kbase_pm_cores_requested(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev,	core_req));
+
+			if (!cores_ready) {
+				/* Stay in this state and return, to retry at
+				 * this state later.
+				 */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+				JS_CORE_REF_REGISTER_INUSE_FAILED,
+						katom->kctx, katom,
+						katom->jc, js,
+						(u32) 0);
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+			/* Proceed to next state */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+		}
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+				"Unhandled kbase_atom_coreref_state %d",
+				katom->coreref_state);
+		break;
+	}
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	base_jd_core_req core_req = katom->core_req;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested */
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							katom->coreref_state);
+		break;
+	}
+
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
+		base_jd_core_req core_req,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to
+		 * power-down */
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested */
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, core_req),
+				kbase_atom_needs_shaders(kbdev, core_req));
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(false,
+						"Unhandled coreref_state: %d",
+							coreref_state);
+		break;
+	}
+}
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+					struct kbase_jd_atom *katom,
+					ktime_t *end_timestamp)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	switch (katom->gpu_rb_state) {
+	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+		/* Should be impossible */
+		WARN(1, "Attempting to release atom not in ringbuffer\n");
+		break;
+
+	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		/* Inform power management at start/finish of atom so it can
+		 * update its GPU utilisation metrics. Mark atom as not
+		 * submitted beforehand. */
+		katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+		kbase_pm_metrics_update(kbdev, end_timestamp);
+
+		if (katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+			&kbdev->gpu_props.props.raw_props.js_features
+				[katom->slot_nr]);
+
+	case KBASE_ATOM_GPU_RB_READY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+		break;
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
+				katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK)
+			kbdev->protected_mode_transition = false;
+
+		if (kbase_jd_katom_is_protected(katom) &&
+				((katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+				 (katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
+				 (katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+		}
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			if (katom->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+				kbdev->l2_users_count--;
+				katom->atom_flags &=
+					~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+			}
+		}
+
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+		break;
+	}
+
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	kbase_gpu_release_atom(kbdev, katom, NULL);
+	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	bool slot_busy[3];
+
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return true;
+	slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+	slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+	if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+		(js != 2 && !slot_busy[2]))
+		return true;
+
+	/* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+	if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1) ||
+			backend->rmu_workaround_flag))
+		return false;
+
+	/* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+	if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+			!backend->rmu_workaround_flag))
+		return false;
+
+	backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+	return true;
+}
+
+/**
+ * other_slots_busy - Determine if any job slots other than @js are currently
+ *                    running atoms
+ * @kbdev: Device pointer
+ * @js:    Job slot
+ *
+ * Return: true if any slots other than @js are busy, false otherwise
+ */
+static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+{
+	int slot;
+
+	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
+		if (slot == js)
+			continue;
+
+		if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot,
+				KBASE_ATOM_GPU_RB_SUBMITTED))
+			return true;
+	}
+
+	return false;
+}
+
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
+{
+	return kbdev->protected_mode;
+}
+
+static void kbase_gpu_disable_coherent(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When entering into protected mode, we must ensure that the
+	 * GPU is not operating in coherent mode as well. This is to
+	 * ensure that no protected memory can be leaked.
+	 */
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
+}
+
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
+{
+	int err = -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
+
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enable(
+				kbdev->protected_dev);
+
+		if (err) {
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
+		} else {
+			kbdev->protected_mode = true;
+			kbase_ipa_protection_mode_switch_event(kbdev);
+		}
+	}
+
+	return err;
+}
+
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
+
+	if (!kbdev->protected_ops)
+		return -EINVAL;
+
+	/* The protected mode disable callback will be called as part of reset
+	 */
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_protected_entry(struct kbase_device *kbdev,
+				struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	err = kbase_gpu_protected_mode_enter(kbdev);
+
+	/*
+	 * Regardless of result before this call, we are no longer
+	 * transitioning the GPU.
+	 */
+
+	kbdev->protected_mode_transition = false;
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+	if (err) {
+		/*
+		 * Failed to switch into protected mode, resume
+		 * vinstr core and fail atom.
+		 */
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+		kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+		/*
+		 * Only return if head atom or previous atom
+		 * already removed - as atoms must be returned
+		 * in order.
+		 */
+		if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+		}
+
+		return -EINVAL;
+	}
+
+	/*
+	 * Protected mode sanity checks.
+	 */
+	KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) ==
+			kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]),
+			kbase_gpu_in_protected_mode(kbdev));
+	katom[idx]->gpu_rb_state =
+			KBASE_ATOM_GPU_RB_READY;
+
+	return err;
+}
+
+static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->protected_state.enter) {
+	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		kbdev->protected_mode_transition = true;
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
+		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+			/*
+			 * We can't switch now because
+			 * the vinstr core state switch
+			 * is not done yet.
+			 */
+			return -EAGAIN;
+		}
+
+		/* Once reaching this point GPU must be
+		 * switched to protected mode or vinstr
+		 * re-enabled. */
+
+		/*
+		 * Not in correct mode, begin protected mode switch.
+		 * Entering protected mode requires us to power down the L2,
+		 * and drop out of fully coherent mode.
+		 */
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
+		/* Avoid unnecessary waiting on non-ACE platforms. */
+		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+				/*
+				 * The L2 is still powered, wait for all the users to
+				 * finish with it before doing the actual reset.
+				 */
+				return -EAGAIN;
+			}
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
+		/*
+		 * When entering into protected mode, we must ensure that the
+		 * GPU is not operating in coherent mode as well. This is to
+		 * ensure that no protected memory can be leaked.
+		 */
+		kbase_gpu_disable_coherent(kbdev);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			/*
+			 * Power on L2 caches; this will also result in the
+			 * correct value written to coherency enable register.
+			 */
+			kbase_pm_request_l2_caches_nolock(kbdev);
+			/*
+			 * Set the flag on the atom that additional
+			 * L2 references are taken.
+			 */
+			katom[idx]->atom_flags |=
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+		}
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234))
+			return -EAGAIN;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
+			/*
+			 * Check that L2 caches are powered and, if so,
+			 * enter protected mode.
+			 */
+			if (kbdev->pm.backend.l2_powered != 0) {
+				/*
+				 * Remove additional L2 reference and reset
+				 * the atom flag which denotes it.
+				 */
+				if (katom[idx]->atom_flags &
+					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
+					kbdev->l2_users_count--;
+					katom[idx]->atom_flags &=
+						~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
+				}
+
+				err = kbase_jm_protected_entry(kbdev, katom, idx, js);
+
+				if (err)
+					return err;
+			} else {
+				/*
+				 * still waiting for L2 caches to power up
+				 */
+				return -EAGAIN;
+			}
+		} else {
+			err = kbase_jm_protected_entry(kbdev, katom, idx, js);
+
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+
+	switch (katom[idx]->protected_state.exit) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
+		 * should ensure that we are not already transitiong, and that
+		 * there are no atoms currently on the GPU. */
+		WARN_ON(kbdev->protected_mode_transition);
+		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		kbdev->protected_mode_transition = true;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+
+		if (err) {
+			kbdev->protected_mode_transition = false;
+
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->protected_state.exit =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		/* A GPU reset is issued when exiting protected mode. Once the
+		 * reset is done all atoms' state will also be reset. For this
+		 * reason, if the atom is still in this state we can safely
+		 * say that the reset has not completed i.e., we have not
+		 * finished exiting protected mode yet.
+		 */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+void kbase_backend_slot_update(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jd_atom *katom[2];
+		int idx;
+
+		katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+		katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+		WARN_ON(katom[1] && !katom[0]);
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			bool cores_ready;
+			int ret;
+
+			if (!katom[idx])
+				continue;
+
+			switch (katom[idx]->gpu_rb_state) {
+			case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+				/* Should be impossible */
+				WARN(1, "Attempting to update atom not in ringbuffer\n");
+				break;
+
+			case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+				if (katom[idx]->atom_flags &
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+					break;
+
+				katom[idx]->gpu_rb_state =
+				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+				if (kbase_gpu_check_secure_atoms(kbdev,
+						!kbase_jd_katom_is_protected(
+						katom[idx])))
+					break;
+
+				if ((idx == 1) && (kbase_jd_katom_is_protected(
+								katom[0]) !=
+						kbase_jd_katom_is_protected(
+								katom[1])))
+					break;
+
+				if (kbdev->protected_mode_transition)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+
+				if (!kbase_gpu_in_protected_mode(kbdev) &&
+					kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition into protected mode. */
+					ret = kbase_jm_enter_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				} else if (kbase_gpu_in_protected_mode(kbdev) &&
+					!kbase_jd_katom_is_protected(katom[idx])) {
+					/* Atom needs to transition out of protected mode. */
+					ret = kbase_jm_exit_protected_mode(kbdev,
+							katom, idx, js);
+					if (ret)
+						break;
+				}
+				katom[idx]->protected_state.exit =
+						KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+				/* Atom needs no protected mode transition. */
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+				cores_ready =
+					kbasep_js_job_check_ref_cores(kbdev, js,
+								katom[idx]);
+
+				if (katom[idx]->event_code ==
+						BASE_JD_EVENT_PM_EVENT) {
+					katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+					break;
+				}
+
+				if (!cores_ready)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+				if (!kbase_gpu_rmu_workaround(kbdev, js))
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_READY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_READY:
+
+				if (idx == 1) {
+					/* Only submit if head atom or previous
+					 * atom already submitted */
+					if ((katom[0]->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED &&
+						katom[0]->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+						break;
+
+					/* If intra-slot serialization in use
+					 * then don't submit atom to NEXT slot
+					 */
+					if (kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTRA_SLOT)
+						break;
+				}
+
+				/* If inter-slot serialization in use then don't
+				 * submit atom if any other slots are in use */
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_INTER_SLOT) &&
+						other_slots_busy(kbdev, js))
+					break;
+
+				if ((kbdev->serialize_jobs &
+						KBASE_SERIALIZE_RESET) &&
+						kbase_reset_gpu_active(kbdev))
+					break;
+
+				/* Check if this job needs the cycle counter
+				 * enabled before submission */
+				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(
+									kbdev);
+
+				kbase_job_hw_submit(kbdev, katom[idx], js);
+				katom[idx]->gpu_rb_state =
+						KBASE_ATOM_GPU_RB_SUBMITTED;
+
+				/* Inform power management at start/finish of
+				 * atom so it can update its GPU utilisation
+				 * metrics. */
+				kbase_pm_metrics_update(kbdev,
+						&katom[idx]->start_timestamp);
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_SUBMITTED:
+				/* Atom submitted to HW, nothing else to do */
+				break;
+
+			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+				/* Only return if head atom or previous atom
+				 * already removed - as atoms must be returned
+				 * in order */
+				if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					kbase_gpu_dequeue_atom(kbdev, js, NULL);
+					kbase_jm_return_atom_to_js(kbdev,
+								katom[idx]);
+				}
+				break;
+			}
+		}
+	}
+
+	/* Warn if PRLAM-8987 affinity restrictions are violated */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+			kbase_gpu_atoms_submitted(kbdev, 1)) &&
+			kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbase_gpu_enqueue_atom(kbdev, katom);
+	kbase_backend_slot_update(kbdev);
+}
+
+#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
+	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *next_katom;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (next_katom && katom->kctx == next_katom->kctx &&
+		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+		(HAS_DEP(next_katom) || next_katom->sched_priority ==
+				katom->sched_priority) &&
+		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
+									!= 0 ||
+		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
+									!= 0)) {
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+				JS_COMMAND_NOP);
+		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+
+		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[next_katom->slot_nr]);
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as
+					[next_katom->kctx->as_nr]);
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx,
+				&kbdev->gpu_props.props.raw_props.js_features
+					[next_katom->slot_nr]);
+		}
+
+		if (next_katom->core_req & BASE_JD_REQ_PERMON)
+			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+		return true;
+	}
+
+	return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/*
+	 * When a hard-stop is followed close after a soft-stop, the completion
+	 * code may be set to STOPPED, even though the job is terminated
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) {
+		if (completion_code == BASE_JD_EVENT_STOPPED &&
+				(katom->atom_flags &
+				KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) {
+			completion_code = BASE_JD_EVENT_TERMINATED;
+		}
+	}
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req &
+					BASE_JD_REQ_SKIP_CACHE_END)) &&
+			completion_code != BASE_JD_EVENT_DONE &&
+			!(completion_code & BASE_JD_SW_EVENT)) {
+		/* When a job chain fails, on a T60x or when
+		 * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not
+		 * flushed. To prevent future evictions causing possible memory
+		 * corruption we need to flush the cache manually before any
+		 * affected memory gets reused. */
+		katom->need_cache_flush_cores_retained = true;
+		kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+		if (kbdev->gpu_props.num_core_groups > 1 &&
+				katom->device_nr >= 1) {
+			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+			katom->need_cache_flush_cores_retained = true;
+			kbase_pm_request_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
+		}
+	}
+
+	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED) {
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		/*
+		 * Dequeue next atom from ringbuffers on same slot if required.
+		 * This atom will already have been removed from the NEXT
+		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+		 * the atoms on this slot are returned in the correct order.
+		 */
+		if (next_katom && katom->kctx == next_katom->kctx &&
+				next_katom->sched_priority ==
+				katom->sched_priority) {
+			WARN_ON(next_katom->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_SUBMITTED);
+			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
+			kbase_jm_return_atom_to_js(kbdev, next_katom);
+		}
+	} else if (completion_code != BASE_JD_EVENT_DONE) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+		int i;
+
+		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+			dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+					js, completion_code,
+					kbase_exception_name
+					(kbdev,
+					completion_code));
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+		kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+		/*
+		 * Remove all atoms on the same context from ringbuffers. This
+		 * will not remove atoms that are already on the GPU, as these
+		 * are guaranteed not to have fail dependencies on the failed
+		 * atom.
+		 */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+			struct kbase_jd_atom *katom_idx0 =
+						kbase_gpu_inspect(kbdev, i, 0);
+			struct kbase_jd_atom *katom_idx1 =
+						kbase_gpu_inspect(kbdev, i, 1);
+
+			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+					HAS_DEP(katom_idx0) &&
+					katom_idx0->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Dequeue katom_idx0 from ringbuffer */
+				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
+
+				if (katom_idx1 &&
+						katom_idx1->kctx == katom->kctx
+						&& HAS_DEP(katom_idx1) &&
+						katom_idx0->gpu_rb_state !=
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+					/* Dequeue katom_idx1 from ringbuffer */
+					kbase_gpu_dequeue_atom(kbdev, i,
+							end_timestamp);
+
+					katom_idx1->event_code =
+							BASE_JD_EVENT_STOPPED;
+					kbase_jm_return_atom_to_js(kbdev,
+								katom_idx1);
+				}
+				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+			} else if (katom_idx1 &&
+					katom_idx1->kctx == katom->kctx &&
+					HAS_DEP(katom_idx1) &&
+					katom_idx1->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* Can not dequeue this atom yet - will be
+				 * dequeued when atom at idx0 completes */
+				katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+				kbase_gpu_mark_atom_for_return(kbdev,
+								katom_idx1);
+			}
+		}
+	}
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+					js, completion_code);
+
+	if (job_tail != 0 && job_tail != katom->jc) {
+		bool was_updated = (job_tail != katom->jc);
+
+		/* Some of the job has been executed, so we update the job chain
+		 * address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+						katom, job_tail, js);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code)completion_code;
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new
+	 *   one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	{
+		/* The atom in the HEAD */
+		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+									0);
+
+		if (next_katom && next_katom->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(*end_timestamp),
+						(u32)next_katom->kctx->id, 0,
+						next_katom->work_id);
+			kbdev->hwaccess.backend.slot_rb[js].last_context =
+							next_katom->kctx;
+		} else {
+			char js_string[16];
+
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+							js_string,
+							sizeof(js_string)),
+						ktime_to_ns(ktime_get()), 0, 0,
+						0);
+			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+		}
+	}
+#endif
+
+	if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)
+		kbase_reset_gpu_silent(kbdev);
+
+	if (completion_code == BASE_JD_EVENT_STOPPED)
+		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+	else
+		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
+
+	if (katom) {
+		/* Cross-slot dependency has now become runnable. Try to submit
+		 * it. */
+
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
+	}
+
+	/* Job completion may have unblocked other atoms. Try to update all job
+	 * slots */
+	kbase_backend_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Reset should always take the GPU out of protected mode */
+	WARN_ON(kbase_gpu_in_protected_mode(kbdev));
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int atom_idx = 0;
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+					js, atom_idx);
+			bool keep_in_jm_rb = false;
+
+			if (!katom)
+				break;
+			if (katom->protected_state.exit ==
+			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
+				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
+
+				kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+				/* protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
+				KBASE_DEBUG_ASSERT_MSG(
+					(kbase_jd_katom_is_protected(katom) && js == 0) ||
+					!kbase_jd_katom_is_protected(katom),
+					"Protected atom on JS%d not supported", js);
+			}
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				/* As the atom was not removed, increment the
+				 * index so that we read the correct atom in the
+				 * next iteration. */
+				atom_idx++;
+				continue;
+			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
+		}
+	}
+
+	kbdev->protected_mode_transition = false;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+							katom->core_req, katom);
+	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+	if (katom->x_post_dep) {
+		struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+		if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+			dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+			return dep_atom->slot_nr;
+	}
+	return -1;
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js,
+					struct kbase_jd_atom *katom,
+					u32 action)
+{
+	struct kbase_jd_atom *katom_idx0;
+	struct kbase_jd_atom *katom_idx1;
+
+	bool katom_idx0_valid, katom_idx1_valid;
+
+	bool ret = false;
+
+	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+	int prio_idx0 = 0, prio_idx1 = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+	if (katom_idx0)
+		prio_idx0 = katom_idx0->sched_priority;
+	if (katom_idx1)
+		prio_idx1 = katom_idx1->sched_priority;
+
+	if (katom) {
+		katom_idx0_valid = (katom_idx0 == katom);
+		/* If idx0 is to be removed and idx1 is on the same context,
+		 * then idx1 must also be removed otherwise the atoms might be
+		 * returned out of order */
+		if (katom_idx1)
+			katom_idx1_valid = (katom_idx1 == katom) ||
+						(katom_idx0_valid &&
+							(katom_idx0->kctx ==
+							katom_idx1->kctx));
+		else
+			katom_idx1_valid = false;
+	} else {
+		katom_idx0_valid = (katom_idx0 &&
+				(!kctx || katom_idx0->kctx == kctx));
+		katom_idx1_valid = (katom_idx1 &&
+				(!kctx || katom_idx1->kctx == kctx) &&
+				prio_idx0 == prio_idx1);
+	}
+
+	if (katom_idx0_valid)
+		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+	if (katom_idx1_valid)
+		stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+	if (katom_idx0_valid) {
+		if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Simple case - just dequeue and return */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			if (katom_idx1_valid) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				katom_idx1->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+				katom_idx1->kctx->blocked_js[js][prio_idx1] =
+						true;
+			}
+
+			katom_idx0->event_code =
+						BASE_JD_EVENT_REMOVED_FROM_NEXT;
+			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+		} else {
+			/* katom_idx0 is on GPU */
+			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
+						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				/* katom_idx0 and katom_idx1 are on GPU */
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT)) == 0) {
+					/* idx0 has already completed - stop
+					 * idx1 if needed*/
+					if (katom_idx1_valid) {
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				} else {
+					/* idx1 is in NEXT registers - attempt
+					 * to remove */
+					kbase_reg_write(kbdev,
+							JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP);
+
+					if (kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_LO))
+									!= 0 ||
+						kbase_reg_read(kbdev,
+							JOB_SLOT_REG(js,
+							JS_HEAD_NEXT_HI))
+									!= 0) {
+						/* idx1 removed successfully,
+						 * will be handled in IRQ */
+						kbase_gpu_remove_atom(kbdev,
+								katom_idx1,
+								action, true);
+						stop_x_dep_idx1 =
+					should_stop_x_dep_slot(katom_idx1);
+
+						/* stop idx0 if still on GPU */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx0,
+								action);
+						ret = true;
+					} else if (katom_idx1_valid) {
+						/* idx0 has already completed,
+						 * stop idx1 if needed */
+						kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+						ret = true;
+					}
+				}
+			} else if (katom_idx1_valid) {
+				/* idx1 not on GPU but must be dequeued*/
+
+				/* idx1 will be handled in IRQ */
+				kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+				/* stop idx0 */
+				/* This will be repeated for anything removed
+				 * from the next registers, since their normal
+				 * flow was also interrupted, and this function
+				 * might not enter disjoint state e.g. if we
+				 * don't actually do a hard stop on the head
+				 * atom */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			} else {
+				/* no atom in idx1 */
+				/* just stop idx0 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+									action);
+				ret = true;
+			}
+		}
+	} else if (katom_idx1_valid) {
+		if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+			/* Mark for return */
+			/* idx1 will be returned once idx0 completes */
+			kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+									false);
+		} else {
+			/* idx1 is on GPU */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_COMMAND_NEXT)) == 0) {
+				/* idx0 has already completed - stop idx1 */
+				kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+									action);
+				ret = true;
+			} else {
+				/* idx1 is in NEXT registers - attempt to
+				 * remove */
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+							JS_COMMAND_NEXT),
+							JS_COMMAND_NOP);
+
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_LO)) != 0 ||
+				    kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+						JS_HEAD_NEXT_HI)) != 0) {
+					/* idx1 removed successfully, will be
+					 * handled in IRQ once idx0 completes */
+					kbase_gpu_remove_atom(kbdev, katom_idx1,
+									action,
+									false);
+				} else {
+					/* idx0 has already completed - stop
+					 * idx1 */
+					kbase_gpu_stop_atom(kbdev, js,
+								katom_idx1,
+								action);
+					ret = true;
+				}
+			}
+		}
+	}
+
+
+	if (stop_x_dep_idx0 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+								NULL, action);
+
+	if (stop_x_dep_idx1 != -1)
+		kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+								NULL, action);
+
+	return ret;
+}
+
+void kbase_gpu_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed
+	 */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops &&
+		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
+						CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+							CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+						CLEAN_CACHES_COMPLETED);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+						KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->need_cache_flush_cores_retained) {
+		unsigned long flags;
+
+		kbase_gpu_cacheclean(kbdev);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_release_cores(kbdev,
+				kbase_atom_needs_tiler(kbdev, katom->core_req),
+				kbase_atom_needs_shaders(kbdev,
+						katom->core_req));
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		katom->need_cache_flush_cores_retained = false;
+	}
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom)
+{
+	/*
+	 * If cache flush required due to HW workaround then perform the flush
+	 * now
+	 */
+	kbase_backend_cacheclean(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+	    (katom->core_req & BASE_JD_REQ_FS)                        &&
+	    katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+	    (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+	    !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+		dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+		if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+			/* The job had a TILE_RANGE_FAULT after was soft-stopped
+			 * Due to an HW issue we try to execute the job again.
+			 */
+			dev_dbg(kbdev->dev,
+				"Clamping has been executed, try to rerun the job\n"
+			);
+			katom->event_code = BASE_JD_EVENT_STOPPED;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+		}
+	}
+
+	/* Clear the coreref_state now - while check_deref_cores() may not have
+	 * been called yet, the caller will have taken a copy of this field. If
+	 * this is not done, then if the atom is re-scheduled (following a soft
+	 * stop) then the core reference would not be retaken. */
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req,
+		enum kbase_atom_coreref_state coreref_state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!kbdev->pm.active_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_update_active(kbdev);
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int js;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int idx;
+
+		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+									js,
+									idx);
+
+			if (katom)
+				dev_info(kbdev->dev,
+				"  js%d idx%d : katom=%p gpu_rb_state=%d\n",
+				js, idx, katom, katom->gpu_rb_state);
+			else
+				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
+								js, idx);
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 0000000000000..c3b9f2d855369
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * kbase_gpu_irq_evict - Evict an atom from a NEXT slot
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot to evict from
+ * @completion_code: Event code from job that was run.
+ *
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * Return: true if job evicted from NEXT registers, false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
+				u32 completion_code);
+
+/**
+ * kbase_gpu_complete_hw - Complete an atom on job slot js
+ *
+ * @kbdev:           Device pointer
+ * @js:              Job slot that has completed
+ * @completion_code: Event code from job that has completed
+ * @job_tail:        The tail address from the hardware if the job has partially
+ *                   completed
+ * @end_timestamp:   Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+				u32 completion_code,
+				u64 job_tail,
+				ktime_t *end_timestamp);
+
+/**
+ * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
+ *
+ * @kbdev:  Device pointer
+ * @js:     Job slot to inspect
+ * @idx:    Index into ringbuffer. 0 is the job currently running on
+ *          the slot, 1 is the job waiting, all other values are invalid.
+ * Return:  The atom at that position in the ringbuffer
+ *          or NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+					int idx);
+
+/**
+ * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 0000000000000..df2dd5ec0526b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,354 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	s8 nr_running_ctxs;
+
+	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+	/* Timer must stop if we are suspending */
+	if (backend->suspend_timer)
+		return false;
+
+	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures
+	 * nr_contexts_pullable is up-to-date for reading */
+	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (kbdev->js_data.softstop_always) {
+		/* Debug support for allowing soft-stop on a single context */
+		return true;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-
+		 * architectural design) to support OpenCL conformance tests, so
+		 * only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-
+		 * Compute jobs will be treated as an OpenCL context (hence, we
+		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs =
+				kbasep_js_ctx_attr_count_on_runpool(kbdev,
+						KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs -
+							nr_compute_ctxs;
+
+			return (bool) (nr_compute_ctxs >= 2 ||
+							nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context
+		 */
+		return (bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_backend_data *backend;
+	int s;
+	bool reset_needed = false;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	backend = container_of(timer, struct kbase_backend_data,
+							scheduling_timer);
+	kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+			atom = kbase_gpu_inspect(kbdev, s, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support
+			 * Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->ticks++;
+
+#ifndef CONFIG_MALI_JOB_DUMP
+				u32 soft_stop_ticks, hard_stop_ticks,
+								gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks =
+						js_devdata->soft_stop_ticks;
+					hard_stop_ticks =
+						js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks =
+						js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->ticks = soft_stop_ticks;
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run
+					 * other jobs.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					int disjoint_threshold =
+		KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+
+					dev_dbg(kbdev->dev, "Soft-stop");
+					/* nr_user_contexts_running is updated
+					 * with the runpool_mutex, but we can't
+					 * take that here.
+					 *
+					 * However, if it's about to be
+					 * increased then the new context can't
+					 * run any jobs until they take the
+					 * hwaccess_lock, so it's OK to observe
+					 * the older value.
+					 *
+					 * Similarly, if it's about to be
+					 * decreased, the last job from another
+					 * context has already finished, so it's
+					 * not too bad that we observe the older
+					 * value and register a disjoint event
+					 * when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running
+							>= disjoint_threshold)
+						softstop_flags |=
+						JS_COMMAND_SW_CAUSES_DISJOINT;
+
+					kbase_job_slot_softstop_swflags(kbdev,
+						s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by
+					 * now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now.
+					 * Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#else				/* !CONFIG_MALI_JOB_DUMP */
+				/* NOTE: During CONFIG_MALI_JOB_DUMP, we use
+				 * the alternate timeouts, which makes the hard-
+				 * stop and GPU reset timeout much longer. We
+				 * also ensure that we don't soft-stop at all.
+				 */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least
+					 * js_devdata->soft_stop_ticks. We do
+					 * not soft-stop during
+					 * CONFIG_MALI_JOB_DUMP, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks ==
+					js_devdata->hard_stop_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->hard_stop_ticks_dumping
+					 * ticks. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					int ms =
+						js_devdata->scheduling_period_ns
+								/ 1000000u;
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+							(unsigned long)ticks,
+							(unsigned long)ms);
+					kbase_job_slot_hardstop(atom->kctx, s,
+									atom);
+#endif
+				} else if (ticks ==
+					js_devdata->gpu_reset_ticks_dumping) {
+					/* Job has been scheduled for at least
+					 * js_devdata->gpu_reset_ticks_dumping
+					 * ticks. It should have left the GPU by
+					 * now. Signal that the GPU needs to be
+					 * reset.
+					 */
+					reset_needed = true;
+				}
+#endif				/* !CONFIG_MALI_JOB_DUMP */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (backend->timer_running)
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+			HRTIMER_MODE_REL);
+
+	backend->timeouts_updated = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+	unsigned long flags;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	if (!timer_callback_should_run(kbdev)) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = false;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/* From now on, return value of timer_callback_should_run() will
+		 * also cause the timer to not requeue itself. Its return value
+		 * cannot change, because it depends on variables updated with
+		 * the runpool_mutex held, which the caller of this must also
+		 * hold */
+		hrtimer_cancel(&backend->scheduling_timer);
+	}
+
+	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+		/* Take spinlock to force synchronisation with timer */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		backend->timer_running = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		hrtimer_start(&backend->scheduling_timer,
+			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+							HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+									0u);
+	}
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	backend->scheduling_timer.function = timer_callback;
+
+	backend->timer_running = false;
+
+	return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	hrtimer_cancel(&backend->scheduling_timer);
+}
+
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = true;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->suspend_timer = false;
+
+	kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 0000000000000..6576e55d2e39d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,74 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev:	Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 0000000000000..3e9af7770e1a6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,393 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. Do not log remaining register accesses. */
+	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
+		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	/* If waiting in loop was performed, log last read value. */
+	if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
+		kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
+
+	return status;
+}
+
+static void validate_protected_page_fault(struct kbase_device *kbdev)
+{
+	/* GPUs which support (native) protected mode shall not report page
+	 * fault addresses unless it has protected debug mode and protected
+	 * debug mode is turned on */
+	u32 protected_debug_mode = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		return;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		protected_debug_mode = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN;
+	}
+
+	if (!protected_debug_mode) {
+		/* fault_addr should never be reported in protected mode.
+		 * However, we just continue by printing an error message */
+		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
+	}
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_HI));
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no,
+							AS_FAULTADDRESS_LO));
+
+		/* Mark the fault protected or not */
+		as->protected_mode = kbdev->protected_mode;
+
+		if (kbdev->protected_mode && as->fault_addr) {
+			/* check if address reporting is allowed */
+			validate_protected_page_fault(kbdev);
+		}
+
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no,
+							AS_FAULTSTATUS));
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+			as->fault_extra_addr = kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+			as->fault_extra_addr <<= 32;
+			as->fault_extra_addr |= kbase_reg_read(kbdev,
+					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+		}
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+	u32 transcfg = 0;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
+		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+		/* Clear PTW_MEMATTR bits */
+		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+		/* Enable correct PTW_MEMATTR bits */
+		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+			/* Clear PTW_SH bits */
+			transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+			/* Enable correct PTW_SH bits */
+			transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+		}
+
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+				transcfg);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL);
+	} else {
+		if (kbdev->system_coherency == COHERENCY_ACE)
+			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
+
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+			current_setup->transtab,
+			current_setup->memattr,
+			transcfg);
+
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 pf_bf_mask;
+
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	/*
+	 * A reset is in-flight and we're flushing the IRQ + bottom half
+	 * so don't update anything as it could race with the reset code.
+	 */
+	if (kbdev->irq_reset_flush)
+		goto unlock;
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
+			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 0000000000000..1f76eeda2324a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Interface file for the direct implementation for MMU hardware access
+ *
+ * Direct MMU hardware interface
+ *
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mmu_interrupt - Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
+ *
+ * @kbdev:          kbase context to clear the fault from.
+ * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 0000000000000..51a10a231df08
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static bool always_on_shaders_needed(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_shaders_needed,	/* shaders_needed */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 0000000000000..e7927cf82e5a7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *    Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
+ *
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *    Shader Cores are kept powered, regardless of whether or not they will be
+ *    needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *    The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 0000000000000..a448a3b680b8f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,483 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_pm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+
+int kbase_pm_runtime_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+	if (callbacks) {
+		kbdev->pm.backend.callback_power_on =
+					callbacks->power_on_callback;
+		kbdev->pm.backend.callback_power_off =
+					callbacks->power_off_callback;
+		kbdev->pm.backend.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.backend.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init =
+					callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term =
+					callbacks->power_runtime_term_callback;
+		kbdev->pm.backend.callback_power_runtime_on =
+					callbacks->power_runtime_on_callback;
+		kbdev->pm.backend.callback_power_runtime_off =
+					callbacks->power_runtime_off_callback;
+		kbdev->pm.backend.callback_power_runtime_idle =
+					callbacks->power_runtime_idle_callback;
+
+		if (callbacks->power_runtime_init_callback)
+			return callbacks->power_runtime_init_callback(kbdev);
+		else
+			return 0;
+	}
+
+	kbdev->pm.backend.callback_power_on = NULL;
+	kbdev->pm.backend.callback_power_off = NULL;
+	kbdev->pm.backend.callback_power_suspend = NULL;
+	kbdev->pm.backend.callback_power_resume = NULL;
+	kbdev->pm.callback_power_runtime_init = NULL;
+	kbdev->pm.callback_power_runtime_term = NULL;
+	kbdev->pm.backend.callback_power_runtime_on = NULL;
+	kbdev->pm.backend.callback_power_runtime_off = NULL;
+	kbdev->pm.backend.callback_power_runtime_idle = NULL;
+
+	return 0;
+}
+
+void kbase_pm_runtime_term(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.callback_power_runtime_term) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+}
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+
+	kbdev->pm.backend.gpu_powered = false;
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
+			kbase_pm_gpu_poweroff_wait_wq);
+
+	kbdev->pm.backend.gpu_powered = false;
+	kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.backend.gpu_in_desired_state = true;
+	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+	kbdev->pm.backend.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+	kbdev->pm.backend.reset_done = false;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
+
+	if (kbase_pm_ca_init(kbdev) != 0)
+		goto workq_fail;
+
+	if (kbase_pm_policy_init(kbdev) != 0)
+		goto pm_policy_fail;
+
+	return 0;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_poweroff_wait_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+#if !PLATFORM_POWER_DOWN_ONLY
+	/* Wait for power transitions to complete. We do this with no locks held
+	 * so that we don't deadlock with any pending workqueues */
+	kbase_pm_check_transitions_sync(kbdev);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+#if PLATFORM_POWER_DOWN_ONLY
+	if (kbdev->pm.backend.gpu_powered) {
+		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/* If L2 cache is powered then we must flush it before
+			 * we power off the GPU. Normally this would have been
+			 * handled when the L2 was powered off. */
+			kbase_gpu_cacheclean(kbdev);
+		}
+	}
+#endif /* PLATFORM_POWER_DOWN_ONLY */
+
+	if (!backend->poweron_required) {
+#if !PLATFORM_POWER_DOWN_ONLY
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON(kbdev->l2_available_bitmap ||
+				kbdev->shader_available_bitmap ||
+				kbdev->tiler_available_bitmap);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+
+		/* Disable interrupts and turn the clock off */
+		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
+			/*
+			 * Page/bus faults are pending, must drop locks to
+			 * process.  Interrupts are disabled so no more faults
+			 * should be generated at this point.
+			 */
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			kbase_flush_mmu_wqs(kbdev);
+			mutex_lock(&js_devdata->runpool_mutex);
+			mutex_lock(&kbdev->pm.lock);
+
+			/* Turn off clock now that fault have been handled. We
+			 * dropped locks so poweron_required may have changed -
+			 * power back on if this is the case (effectively only
+			 * re-enabling of the interrupts would be done in this
+			 * case, as the clocks to GPU were not withdrawn yet).
+			 */
+			if (backend->poweron_required)
+				kbase_pm_clock_on(kbdev, false);
+			else
+				WARN_ON(!kbase_pm_clock_off(kbdev,
+						backend->poweroff_is_suspend));
+		}
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	backend->poweroff_wait_in_progress = false;
+	if (backend->poweron_required) {
+		backend->poweron_required = false;
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_backend_slot_update(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	wake_up(&kbdev->pm.backend.poweroff_wait);
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
+		/* Force all cores off */
+		kbdev->pm.backend.desired_shader_state = 0;
+		kbdev->pm.backend.desired_tiler_state = 0;
+
+		/* Force all cores to be unavailable, in the situation where
+		 * transitions are in progress for some cores but not others,
+		 * and kbase_pm_check_transitions_nolock can not immediately
+		 * power off the cores */
+		kbdev->shader_available_bitmap = 0;
+		kbdev->tiler_available_bitmap = 0;
+		kbdev->l2_available_bitmap = 0;
+
+		kbdev->pm.backend.poweroff_wait_in_progress = true;
+		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		/*Kick off wq here. Callers will have to wait*/
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
+	} else {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+static bool is_poweroff_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
+{
+	wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			is_poweroff_in_progress(kbdev));
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long irq_flags;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive
+	 * them. */
+	ret = kbase_pm_init_hw(kbdev, flags);
+	if (ret) {
+		mutex_unlock(&kbdev->pm.lock);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		return ret;
+	}
+
+	kbasep_pm_init_core_use_bitmaps(kbdev);
+
+	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+			kbdev->pm.debug_core_mask[1] =
+			kbdev->pm.debug_core_mask[2] =
+			kbdev->gpu_props.props.raw_props.shader_present;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU
+	 * cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so
+	 * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+	kbdev->pm.backend.driver_ready_for_irqs = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, false);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+	bool cores_are_available;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	if (cores_are_available)
+		kbase_backend_slot_update(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2)
+{
+	kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+	kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+	kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+	kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+			new_core_mask_js2;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+	kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	/* Force power off the GPU and all cores (regardless of policy), only
+	 * after the PM active count reaches zero (otherwise, we risk turning it
+	 * off prematurely) */
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, true);
+
+	kbase_backend_timer_suspend(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	kbdev->pm.suspending = false;
+	kbase_pm_do_poweron(kbdev, true);
+
+	kbase_backend_timer_resume(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 0000000000000..d4e8e42c6360f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->current_core_mask)
+		pm_backend->ca_cores_enabled = kbdev->current_core_mask;
+	else
+		pm_backend->ca_cores_enabled =
+				kbdev->gpu_props.props.raw_props.shader_present;
+#endif
+	pm_backend->ca_in_transition = false;
+
+	return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
+{
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	pm_backend->ca_cores_enabled = core_mask;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
+			pm_backend->ca_cores_enabled);
+}
+#endif
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (pm_backend->instr_enabled)
+		return kbdev->gpu_props.props.raw_props.shader_present &
+				kbdev->pm.debug_core_mask_all;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+#else
+	return kbdev->gpu_props.props.raw_props.shader_present &
+			kbdev->pm.debug_core_mask_all;
+#endif
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.instr_enabled = true;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	kbdev->pm.backend.instr_enabled = false;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 0000000000000..2b005c9fe4e3b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ *         -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev:               The kbase device structure for the device (must be
+ *                       a valid pointer)
+ * @cores_ready:         The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ *                       state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+						u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
new file mode 100644
index 0000000000000..f67ec650c9814
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * A core availability policy for use with devfreq, where core masks are
+ * associated with OPPs.
+ */
+
+#ifndef MALI_KBASE_PM_CA_DEVFREQ_H
+#define MALI_KBASE_PM_CA_DEVFREQ_H
+
+/**
+ * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy
+ *
+ * This contains data that is private to the devfreq core availability
+ * policy.
+ *
+ * @cores_desired: Cores that the policy wants to be available
+ * @cores_enabled: Cores that the policy is currently returning as available
+ * @cores_used: Cores currently powered or transitioning
+ */
+struct kbasep_pm_ca_policy_devfreq {
+	u64 cores_desired;
+	u64 cores_enabled;
+	u64 cores_used;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops;
+
+/**
+ * kbase_devfreq_set_core_mask - Set core mask for policy to use
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * The new core mask will have immediate effect if the GPU is powered, or will
+ * take effect when it is next powered on.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+
+#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 0000000000000..e90c44def25e6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static bool coarse_demand_shaders_needed(struct kbase_device *kbdev)
+{
+	return kbase_pm_is_active(kbdev);
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	return kbase_pm_is_active(kbdev);
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/* The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_shaders_needed,		/* shaders_needed */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 0000000000000..304e5d7fa32da
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * DOC:
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - Shader Cores are powered up, regardless of whether or not they will be
+ *    needed later.
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *  - Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand
+ *                                         policy
+ *
+ * This contains data that is private to the coarse demand power policy.
+ *
+ * @dummy: Dummy member - no state needed
+ */
+struct kbasep_pm_policy_coarse_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 0000000000000..7fe8eb3cc3a2c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,412 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/**
+ * enum kbase_pm_core_type - The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to
+ * - kbase_pm_get_present_cores()
+ * - kbase_pm_get_active_cores()
+ * - kbase_pm_get_trans_cores()
+ * - kbase_pm_get_ready_cores().
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows core_type_to_reg() function to be simpler and more
+ * efficient.
+ *
+ * @KBASE_PM_CORE_L2: The L2 cache
+ * @KBASE_PM_CORE_SHADER: Shader cores
+ * @KBASE_PM_CORE_TILER: Tiler cores
+ * @KBASE_PM_CORE_STACK: Core stacks
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO,
+	KBASE_PM_CORE_STACK = STACK_PRESENT_LO
+};
+
+/**
+ * struct kbasep_pm_metrics - Metrics data collected for use by the power
+ *                            management framework.
+ *
+ *  @time_busy: number of ns the GPU was busy executing jobs since the
+ *          @time_period_start timestamp.
+ *  @time_idle: number of ns since time_period_start the GPU was not executing
+ *          jobs since the @time_period_start timestamp.
+ *  @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ *           if two CL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ *  @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ *           if two GL jobs were active for 400ns, this value would be updated
+ *           with 800.
+ */
+struct kbasep_pm_metrics {
+	u32 time_busy;
+	u32 time_idle;
+	u32 busy_cl[2];
+	u32 busy_gl;
+};
+
+/**
+ * struct kbasep_pm_metrics_state - State required to collect the metrics in
+ *                                  struct kbasep_pm_metrics
+ *  @time_period_start: time at which busy/idle measurements started
+ *  @gpu_active: true when the GPU is executing jobs. false when
+ *           not. Updated when the job scheduler informs us a job in submitted
+ *           or removed from a GPU slot.
+ *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
+ *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ *  @platform_data: pointer to data controlled by platform specific code
+ *  @kbdev: pointer to kbase device for which metrics are collected
+ *  @values: The current values of the power management metrics. The
+ *           kbase_pm_get_dvfs_metrics() function is used to compare these
+ *           current values with the saved values from a previous invocation.
+ *  @timer: timer to regularly make DVFS decisions based on the power
+ *           management metrics.
+ *  @timer_active: boolean indicating @timer is running
+ *  @dvfs_last: values of the PM metrics from the last DVFS tick
+ *  @dvfs_diff: different between the current and previous PM metrics.
+ */
+struct kbasep_pm_metrics_state {
+	ktime_t time_period_start;
+	bool gpu_active;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	spinlock_t lock;
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+
+	struct kbasep_pm_metrics values;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	bool timer_active;
+	struct kbasep_pm_metrics dvfs_last;
+	struct kbasep_pm_metrics dvfs_diff;
+#endif
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+/**
+ * struct kbase_pm_backend_data - Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ *
+ * @pm_current_policy: The policy that is currently actively controlling the
+ *                     power state.
+ * @pm_policy_data:    Private data for current PM policy
+ * @ca_in_transition:  Flag indicating when core availability policy is
+ *                     transitioning cores. The core availability policy must
+ *                     set this when a change in core availability is occurring.
+ *                     power_change_lock must be held when accessing this.
+ * @reset_done:        Flag when a reset is complete
+ * @reset_done_wait:   Wait queue to wait for changes to @reset_done
+ * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
+ *                     requested
+ * @l2_powered:        State indicating whether all the l2 caches are powered.
+ *                     Non-zero indicates they're *all* powered
+ *                     Zero indicates that some (or all) are not powered
+ * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
+ *                              users
+ * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
+ * @desired_shader_state: A bit mask identifying the shader cores that the
+ *                        power policy would like to be on. The current state
+ *                        of the cores may be different, but there should be
+ *                        transitions in progress that will eventually achieve
+ *                        this state (assuming that the policy doesn't change
+ *                        its mind in the mean time).
+ * @powering_on_shader_state: A bit mask indicating which shader cores are
+ *                            currently in a power-on transition
+ * @desired_tiler_state: A bit mask identifying the tiler cores that the power
+ *                       policy would like to be on. See @desired_shader_state
+ * @powering_on_tiler_state: A bit mask indicating which tiler core are
+ *                           currently in a power-on transition
+ * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
+ *                        in a power-on transition
+ * @powering_on_stack_state: A bit mask indicating which core stacks are
+ *                           currently in a power-on transition
+ * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
+ *                        by the desired_xxx_state variables
+ * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_powered:       Set to true when the GPU is powered and register
+ *                     accesses are possible, false otherwise
+ * @instr_enabled:     Set to true when instrumentation is enabled,
+ *                     false otherwise
+ * @cg1_disabled:      Set if the policy wants to keep the second core group
+ *                     powered off
+ * @driver_ready_for_irqs: Debug state indicating whether sufficient
+ *                         initialization of the driver has occurred to handle
+ *                         IRQs
+ * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
+ *                     accessing @driver_ready_for_irqs
+ * @metrics:           Structure to hold metrics for the GPU
+ * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
+ *                        powered off
+ * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
+ *                        and/or timers are powered off
+ * @gpu_poweroff_timer: Timer for powering off GPU
+ * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
+ * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
+ * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
+ *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
+ *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
+ * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
+ *                             hwaccess_lock must be held when accessing
+ * @poweron_required: true if a GPU power on is required. Should only be set
+ *                    when poweroff_wait_in_progress is true, and therefore the
+ *                    GPU can not immediately be powered on. pm.lock must be
+ *                    held when accessing
+ * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
+ *                       request. pm.lock must be held when accessing
+ * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
+ * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
+ * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
+ * @callback_power_on: Callback when the GPU needs to be turned on. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_off: Callback when the GPU may be turned off. See
+ *                     &struct kbase_pm_callback_conf
+ * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to
+ *                          be turned off. See &struct kbase_pm_callback_conf
+ * @callback_power_resume: Callback when a resume occurs and the GPU needs to
+ *                          be turned on. See &struct kbase_pm_callback_conf
+ * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See
+ *                             &struct kbase_pm_callback_conf
+ * @callback_power_runtime_off: Callback when the GPU may be turned off. See
+ *                              &struct kbase_pm_callback_conf
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
+ * @ca_cores_enabled: Cores that are currently available
+ *
+ * Note:
+ * During an IRQ, @pm_current_policy can be NULL when the policy is being
+ * changed with kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this from IRQ context
+ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
+ * re-issue the policy functions that would have been done under IRQ.
+ */
+struct kbase_pm_backend_data {
+	const struct kbase_pm_policy *pm_current_policy;
+	union kbase_pm_policy_data pm_policy_data;
+	bool ca_in_transition;
+	bool reset_done;
+	wait_queue_head_t reset_done_wait;
+	wait_queue_head_t l2_powered_wait;
+	int l2_powered;
+	int gpu_cycle_counter_requests;
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	u64 desired_shader_state;
+	u64 powering_on_shader_state;
+	u64 desired_tiler_state;
+	u64 powering_on_tiler_state;
+	u64 powering_on_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 powering_on_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	bool gpu_in_desired_state;
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	bool gpu_powered;
+
+	bool instr_enabled;
+
+	bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	spinlock_t gpu_powered_lock;
+
+
+	struct kbasep_pm_metrics_state metrics;
+
+	int gpu_poweroff_pending;
+	int shader_poweroff_pending_time;
+
+	struct hrtimer gpu_poweroff_timer;
+	struct workqueue_struct *gpu_poweroff_wq;
+	struct work_struct gpu_poweroff_work;
+
+	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
+
+	bool poweroff_timer_needed;
+	bool poweroff_timer_running;
+
+	bool poweroff_wait_in_progress;
+	bool poweron_required;
+	bool poweroff_is_suspend;
+
+	struct workqueue_struct *gpu_poweroff_wait_wq;
+	struct work_struct gpu_poweroff_wait_work;
+
+	wait_queue_head_t poweroff_wait;
+
+	int (*callback_power_on)(struct kbase_device *kbdev);
+	void (*callback_power_off)(struct kbase_device *kbdev);
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+	u64 ca_cores_enabled;
+#endif
+};
+
+
+/* List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * struct kbase_pm_policy - Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ *
+ * @name:               The name of this policy
+ * @init:               Function called when the policy is selected
+ * @term:               Function called when the policy is unselected
+ * @shaders_needed:     Function called to find out if shader cores are needed
+ * @get_core_active:    Function called to get the current overall GPU power
+ *                      state
+ * @flags:              Field indicating flags for this policy
+ * @id:                 Field indicating an ID for this policy. This is not
+ *                      necessarily the same as its index in the list returned
+ *                      by kbase_pm_list_policies().
+ *                      It is used purely for debugging.
+ */
+struct kbase_pm_policy {
+	char *name;
+
+	/**
+	 * Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It
+	 * should not attempt to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is
+	 * called.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called when the policy is unselected.
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to find out if shader cores are needed
+	 *
+	 * This needs to at least satisfy kbdev->shader_needed_cnt, and so must
+	 * never return false when kbdev->shader_needed_cnt > 0.
+	 *
+	 * Note that kbdev->pm.active_count being 0 is not a good indicator
+	 * that kbdev->shader_needed_cnt is also 0 - refer to the documentation
+	 * on the active_count member in struct kbase_pm_device_data and
+	 * kbase_pm_is_active().
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if shader cores are needed, false otherwise
+	 */
+	bool (*shaders_needed)(struct kbase_device *kbdev);
+
+	/**
+	 * Function called to get the current overall GPU power state
+	 *
+	 * This function must meet or exceed the requirements for power
+	 * indicated by kbase_pm_is_active().
+	 *
+	 * @kbdev: The kbase device structure for the device (must be a
+	 *         valid pointer)
+	 *
+	 * Return: true if the GPU should be powered, false otherwise
+	 */
+	bool (*get_core_active)(struct kbase_device *kbdev);
+
+	kbase_pm_policy_flags flags;
+	enum kbase_pm_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 0000000000000..01727d698a4c2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static bool demand_shaders_needed(struct kbase_device *kbdev)
+{
+	return (kbdev->shader_needed_cnt > 0);
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	return kbase_pm_is_active(kbdev);
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_shaders_needed,		/* shaders_needed */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 0000000000000..4b05e6d8a63d5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * DOC: Demand power management policy
+ *
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ *
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ *   queued Job Chains:
+ *  - Shader Cores are powered up
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * Note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_demand - Private structure for policy instance data
+ *
+ * @dummy: No state is needed, a dummy variable
+ *
+ * This contains data that is private to the demand power policy.
+ */
+struct kbasep_pm_policy_demand {
+	int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 0000000000000..68419f7cd7454
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1625 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/of.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/**
+ * enum kbasep_pm_action - Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * core_type_to_reg() function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ *
+ * @ACTION_PRESENT: The cores that are present
+ * @ACTION_READY: The cores that are ready
+ * @ACTION_PWRON: Power on the cores specified
+ * @ACTION_PWROFF: Power off the cores specified
+ * @ACTION_PWRTRANS: The cores that are transitioning
+ * @ACTION_PWRACTIVE: The cores that are active
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+		struct kbase_device *kbdev,
+		enum kbase_pm_core_type core_type,
+		enum kbasep_pm_action action);
+
+/**
+ * core_type_to_reg - Decode a core type and action to a register.
+ *
+ * Given a core type (defined by kbase_pm_core_type) and an action (defined
+ * by kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the _LO
+ * register and an offset must be applied to use the _HI register.
+ *
+ * @core_type: The type of core
+ * @action:    The type of action
+ *
+ * Return: The register offset of the _LO register that performs an action of
+ * type @action on a core of type @core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+						enum kbasep_pm_action action)
+{
+#ifdef CONFIG_MALI_CORESTACK
+	if (core_type == KBASE_PM_CORE_STACK) {
+		switch (action) {
+		case ACTION_PRESENT:
+			return STACK_PRESENT_LO;
+		case ACTION_READY:
+			return STACK_READY_LO;
+		case ACTION_PWRON:
+			return STACK_PWRON_LO;
+		case ACTION_PWROFF:
+			return STACK_PWROFF_LO;
+		case ACTION_PWRTRANS:
+			return STACK_PWRTRANS_LO;
+		default:
+			BUG();
+		}
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	return (u32)core_type + (u32)action;
+}
+
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
+	}
+}
+#endif
+
+/**
+ * kbase_pm_invoke - Invokes an action on a core set
+ *
+ * This function performs the action given by @action on a set of cores of a
+ * type given by @core_type. It is a static function used by
+ * kbase_pm_transition_core_type()
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the action should be performed on
+ * @cores:     A bit mask of cores to perform the action on (low 32 bits)
+ * @action:    The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					u64 cores,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif
+
+	if (cores) {
+		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+		if (action == ACTION_PWRON)
+			state |= cores;
+		else if (action == ACTION_PWROFF)
+			state &= ~cores;
+		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+	}
+
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+									lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+									0u, lo);
+				break;
+			default:
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+									0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+								NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
+				break;
+			default:
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi);
+}
+
+/**
+ * kbase_pm_get_state - Get information about a core set
+ *
+ * This function gets information (chosen by @action) about a set of cores of
+ * a type given by @core_type. It is a static function used by
+ * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and
+ * kbase_pm_get_ready_cores().
+ *
+ * @kbdev:     The kbase device structure of the device
+ * @core_type: The type of core that the should be queried
+ * @action:    The property of the cores to query
+ *
+ * Return: A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+					enum kbase_pm_core_type core_type,
+					enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg));
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4));
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
+{
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->shader_needed_cnt = 0;
+}
+
+/**
+ * kbase_pm_get_present_cores - Get the cores that are present
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L2:
+		return kbdev->gpu_props.props.raw_props.l2_present;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->gpu_props.props.raw_props.shader_present;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->gpu_props.props.raw_props.tiler_present;
+	case KBASE_PM_CORE_STACK:
+		return kbdev->gpu_props.props.raw_props.stack_present;
+	default:
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * kbase_pm_get_active_cores - Get the cores that are "active"
+ *                             (busy processing work)
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are active
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * kbase_pm_get_trans_cores - Get the cores that are transitioning between
+ *                            power states
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are transitioning
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * kbase_pm_get_ready_cores - Get the cores that are powered on
+ *
+ * @kbdev: Kbase device
+ * @type: The type of cores to query
+ *
+ * Return: Bitmask of cores that are ready (powered on)
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+								(u32) result);
+		break;
+	default:
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * kbase_pm_transition_core_type - Perform power transitions for a particular
+ *                                 core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @kbdev:             The kbase device
+ * @type:              The core type to perform transitions for
+ * @desired_state:     A bit mask of the desired state of the cores
+ * @in_use:            A bit mask of the cores that are currently running
+ *                     jobs. These cores have to be kept powered up because
+ *                     there are jobs running (or about to run) on them.
+ * @available:         Receives a bit mask of the cores that the job
+ *                     scheduler can use to submit jobs to. May be NULL if
+ *                     this is not needed.
+ * @powering_on:       Bit mask to update with cores that are
+ *                    transitioning to a power-on state.
+ *
+ * Return: true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type,
+						u64 desired_state,
+						u64 in_use,
+						u64 * const available,
+						u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+
+	/* mask off ready from trans in case transitions finished between the
+	 * register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	if (trans) /* Do not progress if any cores are transitioning */
+		return false;
+
+	*powering_on = powering_on_trans;
+
+	/* Update desired state to include the in-use cores. These have to be
+	 * kept powered up because there are jobs running or about to run on
+	 * these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) &&
+								(trans == 0)) {
+			/* All are ready, none will be turned off, and none are
+			 * transitioning */
+			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users
+				 * (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.backend.l2_powered_wait);
+			}
+		} else
+			kbdev->pm.backend.l2_powered = 0;
+	}
+
+	if (desired_state == ready && (trans == 0))
+		return true;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev,
+				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only
+						    * mask off cores in power on
+						    * transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+#if !PLATFORM_POWER_DOWN_ONLY
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+#endif
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * get_desired_cache_status - Determine which caches should be on for a
+ *                            particular core state
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @present:       The bit mask of present caches
+ * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
+ *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
+ *
+ * Return: A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that
+		 * haven't previously been processed) then we need this core on
+		 */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+#ifdef CONFIG_MALI_CORESTACK
+u64 kbase_pm_core_stack_mask(u64 cores)
+{
+	u64 stack_mask = 0;
+	size_t const MAX_CORE_ID = 31;
+	size_t const NUM_CORES_PER_STACK = 4;
+	size_t i;
+
+	for (i = 0; i <= MAX_CORE_ID; ++i) {
+		if (test_bit(i, (unsigned long *)&cores)) {
+			/* Every core which ID >= 16 is filled to stacks 4-7
+			 * instead of 0-3 */
+			size_t const stack_num = (i >= 16) ?
+				(i % NUM_CORES_PER_STACK) + 4 :
+				(i % NUM_CORES_PER_STACK);
+			set_bit(stack_num, (unsigned long *)&stack_mask);
+		}
+	}
+
+	return stack_mask;
+}
+#endif /* CONFIG_MALI_CORESTACK */
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	bool cores_are_available = false;
+	bool in_desired_state = true;
+	u64 desired_l2_state;
+#ifdef CONFIG_MALI_CORESTACK
+	u64 desired_stack_state;
+	u64 stacks_powered;
+#endif /* CONFIG_MALI_CORESTACK */
+	u64 cores_powered;
+	u64 tilers_powered;
+	u64 tiler_available_bitmap;
+	u64 tiler_transitioning_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+	u64 l2_inuse_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+	if (kbdev->pm.backend.gpu_powered == false) {
+		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+		if (kbdev->pm.backend.desired_shader_state == 0 &&
+				kbdev->pm.backend.desired_tiler_state == 0)
+			return true;
+		return false;
+	}
+
+	/* If any cores are already powered then, we must keep the caches on */
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+#ifdef CONFIG_MALI_CORESTACK
+	/* Work out which core stacks want to be powered */
+	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
+	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
+		desired_stack_state;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* Work out which tilers want to be powered */
+	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_TILER);
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if
+	 * all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+
+	desired_l2_state = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered, tilers_powered);
+
+	l2_inuse_bitmap = get_desired_cache_status(
+			kbdev->gpu_props.props.raw_props.l2_present,
+			cores_powered | shader_transitioning_bitmap,
+			tilers_powered | tiler_transitioning_bitmap);
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (stacks_powered)
+		desired_l2_state |= 1;
+#endif /* CONFIG_MALI_CORESTACK */
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state)
+		desired_l2_state |= 1;
+
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev,
+			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
+			&l2_available_bitmap,
+			&kbdev->pm.backend.powering_on_l2_state);
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+
+#ifdef CONFIG_MALI_CORESTACK
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_STACK, desired_stack_state, 0,
+				&kbdev->stack_available_bitmap,
+				&kbdev->pm.backend.powering_on_stack_state);
+	}
+#endif /* CONFIG_MALI_CORESTACK */
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_TILER,
+				kbdev->pm.backend.desired_tiler_state,
+				0, &tiler_available_bitmap,
+				&kbdev->pm.backend.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev,
+				KBASE_PM_CORE_SHADER,
+				kbdev->pm.backend.desired_shader_state,
+				0, &shader_available_bitmap,
+				&kbdev->pm.backend.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u,
+						(u32) shader_available_bitmap);
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32) tiler_available_bitmap);
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap &
+			kbdev->gpu_props.props.raw_props.tiler_present) !=
+			kbdev->gpu_props.props.raw_props.tiler_present) {
+		tiler_available_bitmap = 0;
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+							KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if
+	 * we've not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap &
+					kbdev->pm.backend.desired_shader_state)
+				== kbdev->pm.backend.desired_shader_state &&
+		(kbdev->tiler_available_bitmap &
+					kbdev->pm.backend.desired_tiler_state)
+				== kbdev->pm.backend.desired_tiler_state) {
+		cores_are_available = true;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+				(u32)(kbdev->shader_available_bitmap &
+				kbdev->pm.backend.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+				(u32)(kbdev->tiler_available_bitmap &
+				kbdev->pm.backend.desired_tiler_state));
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
+						kbase_pm_get_ready_cores(kbdev,
+							KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+#endif
+
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_L2,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_L2));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_SHADER,
+				kbase_pm_get_ready_cores(
+					kbdev, KBASE_PM_CORE_SHADER));
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_TILER,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_TILER));
+#ifdef CONFIG_MALI_CORESTACK
+		KBASE_TLSTREAM_AUX_PM_STATE(
+				KBASE_PM_CORE_STACK,
+				kbase_pm_get_ready_cores(
+					kbdev,
+					KBASE_PM_CORE_STACK));
+#endif /* CONFIG_MALI_CORESTACK */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+				kbdev->pm.backend.gpu_in_desired_state,
+				(u32)kbdev->pm.backend.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+				(u32)kbdev->pm.backend.desired_tiler_state);
+
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+
+		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+	kbdev->shader_ready_bitmap = shader_ready_bitmap;
+	kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
+
+	/* The core availability policy is not allowed to keep core group 0
+	 * turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+		(prev_l2_available_bitmap == desired_l2_state) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) &
+		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.backend.cg1_disabled = true;
+	else
+		kbdev->pm.backend.cg1_disabled = false;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+ * aborted due to a fatal signal. If the time spent waiting has exceeded this
+ * threshold then there is most likely a hardware issue. */
+#define PM_TIMEOUT (5*HZ) /* 5s */
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	bool cores_are_available;
+	int ret;
+
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbdev->pm.backend.gpu_in_desired_state);
+
+	if (ret < 0 && time_after(jiffies, timeout)) {
+		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+		dev_err(kbdev->dev, "Desired state :\n");
+		dev_err(kbdev->dev, "\tShader=%016llx\n",
+				kbdev->pm.backend.desired_shader_state);
+		dev_err(kbdev->dev, "\tTiler =%016llx\n",
+				kbdev->pm.backend.desired_tiler_state);
+		dev_err(kbdev->dev, "Current state :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_HI)),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_READY_LO)));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_HI)),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_READY_LO)));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_HI)),
+				kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(L2_READY_LO)));
+		dev_err(kbdev->dev, "Cores transitioning :\n");
+		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_HI)),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						SHADER_PWRTRANS_LO)));
+		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_HI)),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						TILER_PWRTRANS_LO)));
+		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_HI)),
+				kbase_reg_read(kbdev, GPU_CONTROL_REG(
+						L2_PWRTRANS_LO)));
+#if KBASE_GPU_RESET_EN
+		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_disable_interrupts_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+	bool reset_required = is_resume;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_powered) {
+		/* Already turned on */
+		if (kbdev->poweroff_pending)
+			kbase_pm_enable_interrupts(kbdev);
+		kbdev->poweroff_pending = false;
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	kbdev->poweroff_pending = false;
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.backend.callback_power_resume) {
+		kbdev->pm.backend.callback_power_resume(kbdev);
+		return;
+	} else if (kbdev->pm.backend.callback_power_on) {
+		kbdev->pm.backend.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the
+		 * return value of the callback_power_on function to
+		 * conditionally reset the GPU on power up. Currently we are
+		 * conservative and always reset the GPU. */
+		reset_required = true;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	kbdev->pm.backend.gpu_powered = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+	}
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_restore_all_as(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	kbdev->poweroff_pending = true;
+
+	if (!kbdev->pm.backend.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+			kbdev->pm.backend.callback_power_suspend(kbdev);
+		return true;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (atomic_read(&kbdev->faults_pending)) {
+		/* Page/bus faults are still being processed. The GPU can not
+		 * be powered off until they have completed */
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+									flags);
+		return false;
+	}
+
+	kbase_pm_cache_snoop_disable(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	kbdev->pm.backend.gpu_powered = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+		kbdev->pm.backend.callback_power_suspend(kbdev);
+	else if (kbdev->pm.backend.callback_power_off)
+		kbdev->pm.backend.callback_power_off(kbdev);
+	return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.reset_done = true;
+	wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * kbase_pm_wait_for_reset - Wait for a reset to happen
+ *
+ * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ *
+ * @kbdev: Kbase device
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.backend.reset_done_wait,
+						(kbdev->pm.backend.reset_done));
+	kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata =
+		container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+	 * hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	u32 jm_values[4];
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
+		GPU_ID_VERSION_MAJOR_SHIFT;
+
+	kbdev->hw_quirks_sc = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	/* Enable alternative hardware counter selection if configured. */
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
+		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
+			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
+			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (!kbdev->hw_quirks_sc)
+		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_CONFIG));
+
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(TILER_CONFIG));
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+			GPU_CONTROL_REG(L2_MMU_CONFIG));
+
+
+	/* Limit read & write ID width for AXI */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) <<
+				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT;
+	} else {
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+		kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+		kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+	}
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+
+	kbdev->hw_quirks_jm = 0;
+	/* Only for T86x/T88x-based products after r2p0 */
+	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
+
+		if (of_property_read_u32_array(np,
+					"jm_config",
+					&jm_values[0],
+					ARRAY_SIZE(jm_values))) {
+			/* Entry not in device tree, use defaults  */
+			jm_values[0] = 0;
+			jm_values[1] = 0;
+			jm_values[2] = 0;
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Limit throttle limit to 6 bits*/
+		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
+			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
+			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
+		}
+
+		/* Aggregate to one integer. */
+		kbdev->hw_quirks_jm |= (jm_values[0] ?
+				JM_TIMESTAMP_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[1] ?
+				JM_CLOCK_GATE_OVERRIDE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[2] ?
+				JM_JOB_THROTTLE_ENABLE : 0);
+		kbdev->hw_quirks_jm |= (jm_values[3] <<
+				JM_JOB_THROTTLE_LIMIT_SHIFT);
+
+	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+					   GPU_ID2_PRODUCT_TMIX)) {
+		/* Only for tMIx */
+		u32 coherency_features;
+
+		coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES));
+
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |=
+				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+				JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
+		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+
+	if (!kbdev->hw_quirks_jm)
+		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG));
+
+#ifdef CONFIG_MALI_CORESTACK
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+#endif /* CONFIG_MALI_CORESTACK */
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+			kbdev->hw_quirks_sc);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+			kbdev->hw_quirks_tiler);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+			kbdev->hw_quirks_mmu);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
+			kbdev->hw_quirks_jm);
+
+}
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
+	}
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if (kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
+
+static int kbase_pm_do_reset(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_SOFT_RESET);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says
+	 * the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
+							RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the
+		 * interrupts are not getting to the CPU */
+		dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return -EINVAL;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard
+	 * reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+								RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+							HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		return 0;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+								RESET_TIMEOUT);
+
+	return -EINVAL;
+}
+
+static int kbasep_protected_mode_enable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE);
+	return 0;
+}
+
+static int kbasep_protected_mode_disable(struct protected_mode_device *pdev)
+{
+	struct kbase_device *kbdev = pdev->data;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	return kbase_pm_do_reset(kbdev);
+}
+
+struct protected_mode_ops kbase_native_protected_ops = {
+	.protected_mode_enable = kbasep_protected_mode_enable,
+	.protected_mode_disable = kbasep_protected_mode_disable
+};
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support)
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+	else
+		err = kbase_pm_do_reset(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+	if (err)
+		goto exit;
+
+	if (flags & PM_HW_ISSUES_DETECT)
+		kbase_pm_hw_issues_detect(kbdev);
+
+	kbase_pm_hw_issues_apply(kbdev);
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS));
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
+
+	/* If cycle counter was in use re-enable it, enable_irqs will only be
+	 * false when called from kbase_pm_powerup */
+	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+						(flags & PM_ENABLE_IRQS)) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+		if (kbdev->pm.backend.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START);
+		spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+								irq_flags);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+		kbase_pm_release_l2_caches(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (flags & PM_ENABLE_IRQS)
+		kbase_pm_enable_interrupts(kbdev);
+
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
+}
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters
+ *
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * kbase_pm_request_gpu_cycle_counter() or
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (
+ * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ *
+ * @kbdev:     The kbase device structure of the device
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_START);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+								INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.backend.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CYCLE_COUNT_STOP);
+
+	spin_unlock_irqrestore(
+			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+									flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 0000000000000..0d3599ae5da81
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,580 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * kbase_pm_dev_idle - The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_dev_activate - The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_get_present_cores - Get details of the cores that are present in
+ *                              the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid
+ *         pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_active_cores - Get details of the cores that are currently
+ *                             active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_trans_cores - Get details of the cores that are currently
+ *                            transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_get_ready_cores - Get details of the cores that are currently
+ *                            powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @type:  The type of core (see the enum kbase_pm_core_type enumeration)
+ *
+ * Return: The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+						enum kbase_pm_core_type type);
+
+/**
+ * kbase_pm_clock_on - Turn the clock for the device on, and enable device
+ *                     interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if clock on due to resume after suspend, false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the
+ *                      device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if clock off due to suspend, false otherwise
+ *
+ * Return: true  if clock was turned off, or
+ *         false if clock can not be turned off due to pending page/bus fault
+ *               workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * kbase_pm_enable_interrupts - Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts - Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
+ *                                      that does not take the hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * kbase_pm_reset_done - The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_pm_check_transitions_nolock - Check if there are any power transitions
+ *                                     to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were *un*available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return:      non-zero when all desired cores are available. That is,
+ *              it's worthwhile for the caller to submit a job.
+ *              false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_check_transitions_sync - Synchronous and locking variant of
+ *                                   kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device.pm.power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
+ *                                      where the caller must hold
+ *                                      kbase_device.pm.power_change_lock
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores_state - Update the desired state of shader cores from
+ *                               the Power Policy, and begin any power
+ *                               transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
+ *                                     the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
+ *                                   and used cores.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_pm_metrics_term - Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init()) after calling this function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_report_vsync - Function to be called by the frame buffer driver to
+ *                         update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of @buffer_updated differs from a previous call.
+ *
+ * @kbdev:          The kbase device structure for the device (must be a
+ *                  valid pointer)
+ * @buffer_updated: True if the buffer has been updated on this VSync,
+ *                  false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change
+ *                            the clock speed of the GPU.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating.
+ */
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is
+ *                                      needed
+ *
+ * If the caller is the first caller then the GPU cycle counters will be enabled
+ * along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called).
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is
+ *                                               needed (l2 cache already on)
+ *
+ * This is a version of the above function
+ * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * kbase_pm_context_active() must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
+ *                                      longer in use
+ *
+ * If the caller is the last caller then the GPU cycle counters will be
+ * disabled. A request must have been made before a call to this.
+ *
+ * Caller must not hold the hwaccess_lock, as it will be taken in this function.
+ * If the caller is already holding this lock then
+ * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
+ *                                             that does not take hwaccess_lock
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
+ *                                       complete
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device
+ *
+ * Setup the power management callbacks and initialize/enable the runtime-pm
+ * for the Mali GPU platform device, using the callback function. This must be
+ * called before the kbase_pm_register_access_enable() function.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+int kbase_pm_runtime_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_runtime_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_enable - Enable access to GPU registers
+ *
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_register_access_disable - Disable early register access
+ *
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * kbase_pm_metrics_is_active - Check if the power management metrics
+ *                              collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * Return: true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid
+ *             pointer)
+ * @is_resume: true if power on due to resume after suspend,
+ *             false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been
+ *                        requested.
+ *
+ * @kbdev:      The kbase device structure for the device (must be a valid
+ *              pointer)
+ * @is_suspend: true if power off due to suspend,
+ *              false otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff);
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * kbase_platform_dvfs_event - Report utilisation to DVFS code
+ *
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @kbdev:         The kbase device structure for the device (must be a
+ *                 valid pointer)
+ * @utilisation:   The current calculated utilisation by the metrics system.
+ * @util_gl_share: The current calculated gl share of utilisation.
+ * @util_cl_share: The current calculated cl share of utilisation per core
+ *                 group.
+ * Return:         Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_update - Inform the metrics system that an atom is either
+ *                           about to be run or has just completed.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @now:   Pointer to the timestamp of the change, or NULL to use current time
+ *
+ * Caller must hold hwaccess_lock
+ */
+void kbase_pm_metrics_update(struct kbase_device *kbdev,
+				ktime_t *now);
+
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+/**
+ * kbase_devfreq_set_core_mask - Set devfreq core mask
+ * @kbdev:     Device pointer
+ * @core_mask: New core mask
+ *
+ * This function is used by devfreq to change the available core mask as
+ * required by Dynamic Core Scaling.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+#endif
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 0000000000000..6b9b6862cc9b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,295 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_jm_rb.h>
+#include <backend/gpu/mali_kbase_pm_defs.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbasep_pm_metrics_state *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_state, timer);
+	kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+			HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.backend.metrics.kbdev = kbdev;
+
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.gpu_active = false;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
+	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
+	kbdev->pm.backend.metrics.values.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.backend.metrics.timer_active = true;
+	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+								ktime_t now)
+{
+	ktime_t diff;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	if (kbdev->pm.backend.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		kbdev->pm.backend.metrics.values.time_busy += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time;
+		if (kbdev->pm.backend.metrics.active_cl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[0])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+	} else {
+		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
+							>> KBASE_PM_TIME_SHIFT);
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
+			       struct kbasep_pm_metrics *last,
+			       struct kbasep_pm_metrics *diff)
+{
+	struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+
+	memset(diff, 0, sizeof(*diff));
+	diff->time_busy = cur->time_busy - last->time_busy;
+	diff->time_idle = cur->time_idle - last->time_idle;
+	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
+	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
+	diff->busy_gl = cur->busy_gl - last->busy_gl;
+
+	*last = *cur;
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	int busy;
+	struct kbasep_pm_metrics *diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	diff = &kbdev->pm.backend.metrics.dvfs_diff;
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
+
+	utilisation = (100 * diff->time_busy) /
+			max(diff->time_busy + diff->time_idle, 1u);
+
+	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+	util_gl_share = (100 * diff->busy_gl) / busy;
+	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
+	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
+
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+}
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	isactive = kbdev->pm.backend.metrics.timer_active;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/**
+ * kbase_pm_metrics_active_calc - Update PM active counts based on currently
+ *                                running atoms
+ * @kbdev: Device pointer
+ *
+ * The caller must hold kbdev->pm.backend.metrics.lock
+ */
+static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.gpu_active = false;
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+
+		/* Head atom may have just completed, so if it isn't running
+		 * then try the next atom */
+		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
+			katom = kbase_gpu_inspect(kbdev, js, 1);
+
+		if (katom && katom->gpu_rb_state ==
+				KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+				int device_nr = (katom->core_req &
+					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+						? katom->device_nr : 0;
+				if (!WARN_ON(device_nr >= 2))
+					kbdev->pm.backend.metrics.
+						active_cl_ctx[device_nr] = 1;
+			} else {
+				/* Slot 2 should not be running non-compute
+				 * atoms */
+				if (!WARN_ON(js >= 2))
+					kbdev->pm.backend.metrics.
+						active_gl_ctx[js] = 1;
+			}
+			kbdev->pm.backend.metrics.gpu_active = true;
+		}
+	}
+}
+
+/* called when job is submitted to or removed from a GPU slot */
+void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
+{
+	unsigned long flags;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+	if (!timestamp) {
+		now = ktime_get();
+		timestamp = &now;
+	}
+
+	/* Track how long CL and/or GL jobs have been busy for */
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
+
+	kbase_pm_metrics_active_calc(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 0000000000000..6dd00a92864ca
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,777 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_coarse_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+#if !PLATFORM_POWER_DOWN_ONLY
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif /* !PLATFORM_POWER_DOWN_ONLY */
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/* The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+						KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+/**
+ * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
+ *                               requested shader cores
+ * @kbdev: Device pointer
+ */
+static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+{
+	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.desired_shader_state &=
+			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
+		bool cores_are_available;
+
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+
+		/* Don't need 'cores_are_available',
+		 * because we don't return anything */
+		CSTD_UNUSED(cores_are_available);
+	}
+}
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(timer, struct kbase_device,
+						pm.backend.gpu_poweroff_timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* It is safe for this call to do nothing if the work item is already
+	 * queued. The worker function will read the must up-to-date state of
+	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
+	 *
+	 * If a state change occurs while the worker function is processing,
+	 * this call will succeed as a work item can be requeued once it has
+	 * started processing.
+	 */
+	if (kbdev->pm.backend.gpu_poweroff_pending)
+		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+					&kbdev->pm.backend.gpu_poweroff_work);
+
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending_time--;
+
+		KBASE_DEBUG_ASSERT(
+				kbdev->pm.backend.shader_poweroff_pending_time
+									>= 0);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending_time)
+			kbasep_pm_do_poweroff_cores(kbdev);
+	}
+
+	if (kbdev->pm.backend.poweroff_timer_needed) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	kbdev->pm.backend.poweroff_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	bool do_poweroff = false;
+
+	kbdev = container_of(data, struct kbase_device,
+						pm.backend.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.backend.gpu_poweroff_pending--;
+
+	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+		do_poweroff = true;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (do_poweroff) {
+		kbdev->pm.backend.poweroff_timer_needed = false;
+		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+		kbdev->pm.backend.poweroff_timer_running = false;
+
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, false);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("kbase_pm_do_poweroff",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!wq)
+		return -ENOMEM;
+
+	kbdev->pm.backend.gpu_poweroff_wq = wq;
+	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+			kbasep_pm_do_gpu_poweroff_wq);
+	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.backend.gpu_poweroff_timer.function =
+			kbasep_pm_do_gpu_poweroff_callback;
+	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy->init(kbdev);
+	kbdev->pm.gpu_poweroff_time =
+			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+	return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.backend.pm_current_policy->term(kbdev);
+	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.backend.poweroff_timer_needed = false;
+	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.poweroff_timer_running = false;
+
+	/* If wq is already running but is held off by pm.lock, make sure it has
+	 * no effect */
+	kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
+	kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool active;
+
+	lockdep_assert_held(&pm->lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	active = backend->pm_current_policy->get_core_active(kbdev);
+	WARN((kbase_pm_is_active(kbdev) && !active),
+		"GPU is active but policy '%s' is indicating that it can be powered off",
+		kbdev->pm.backend.pm_current_policy->name);
+
+	if (active) {
+		if (backend->gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			backend->gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still
+			 * powered, so no need to continue */
+			if (!kbdev->poweroff_pending) {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				return;
+			}
+		}
+
+		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+				(pm->poweroff_gpu_ticks ||
+				pm->poweroff_shader_ticks)) {
+			backend->poweroff_timer_needed = true;
+			backend->poweroff_timer_running = true;
+			hrtimer_start(&backend->gpu_poweroff_timer,
+					pm->gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		if (pm->backend.poweroff_wait_in_progress) {
+			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+			pm->backend.poweron_required = true;
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			kbase_pm_do_poweron(kbdev, false);
+		}
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(pm->active_count == 0);
+
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
+			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
+			backend->shader_poweroff_pending_time = 0;
+		}
+
+		/* Request power off */
+		if (pm->backend.gpu_powered) {
+			if (pm->poweroff_gpu_ticks) {
+				backend->gpu_poweroff_pending =
+						pm->poweroff_gpu_ticks;
+				backend->poweroff_timer_needed = true;
+				if (!backend->poweroff_timer_running) {
+					/* Start timer if not running (eg if
+					 * power policy has been changed from
+					 * always_on to something else). This
+					 * will ensure the GPU is actually
+					 * powered off */
+					backend->poweroff_timer_running
+							= true;
+					hrtimer_start(
+						&backend->gpu_poweroff_timer,
+						pm->gpu_poweroff_time,
+						HRTIMER_MODE_REL);
+				}
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+			} else {
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				/* Power off the GPU immediately */
+				kbase_pm_do_poweroff(kbdev, false);
+			}
+		} else {
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+}
+
+/**
+ * get_desired_shader_bitmap - Get the desired shader bitmap, based on the
+ *                             current power policy
+ *
+ * @kbdev: The kbase device structure for the device
+ *
+ * Queries the current power policy to determine if shader cores will be
+ * required in the current state, and apply any HW workarounds.
+ *
+ * Return: bitmap of desired shader cores
+ */
+
+static u64 get_desired_shader_bitmap(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap = 0u;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev))
+		desired_bitmap = kbase_pm_ca_get_core_mask(kbdev);
+
+	WARN(!desired_bitmap && kbdev->shader_needed_cnt,
+			"Shader cores are needed but policy '%s' did not make them needed",
+			kbdev->pm.backend.pm_current_policy->name);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+		/* Unless XAFFINITY is supported, enable core 0 if tiler
+		 * required, regardless of core availability
+		 */
+		if (kbdev->tiler_needed_cnt > 0)
+			desired_bitmap |= 1;
+	}
+
+	return desired_bitmap;
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
+	bool cores_are_available;
+	bool do_poweroff = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->pm.backend.pm_current_policy == NULL)
+		return;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		return;
+
+	if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt &&
+			!kbdev->tiler_needed_cnt) {
+		/* We are trying to change in/out of protected mode - force all
+		 * cores off so that the L2 powers down */
+		desired_bitmap = 0;
+		desired_tiler_bitmap = 0;
+	} else {
+		desired_bitmap = get_desired_shader_bitmap(kbdev);
+
+		if (kbdev->tiler_needed_cnt > 0)
+			desired_tiler_bitmap = 1;
+		else
+			desired_tiler_bitmap = 0;
+	}
+
+	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+							(u32)desired_bitmap);
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
+	    kbdev->pm.backend.ca_in_transition) {
+		/* Check if we are powering off any cores before updating shader
+		 * state */
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+
+			if (kbdev->pm.poweroff_shader_ticks &&
+					!kbdev->protected_mode_transition)
+				kbdev->pm.backend.shader_poweroff_pending_time =
+						kbdev->pm.poweroff_shader_ticks;
+			else
+				do_poweroff = true;
+		}
+
+		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.backend.shader_poweroff_pending |=
+				(kbdev->pm.backend.desired_shader_state &
+							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
+		if (kbdev->pm.poweroff_shader_ticks &&
+				!kbdev->protected_mode_transition)
+			kbdev->pm.backend.shader_poweroff_pending_time =
+					kbdev->pm.poweroff_shader_ticks;
+		else
+			kbasep_pm_do_poweroff_cores(kbdev);
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.backend.poweroff_timer_needed = false;
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power
+	 * off unwanted cores */
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
+		kbdev->pm.backend.shader_poweroff_pending &=
+				~(kbdev->pm.backend.desired_shader_state &
+								desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
+			kbdev->pm.backend.shader_poweroff_pending_time = 0;
+	}
+
+	/* Shader poweroff is deferred to the end of the function, to eliminate
+	 * issues caused by the core availability policy recursing into this
+	 * function */
+	if (do_poweroff)
+		kbasep_pm_do_poweroff_cores(kbdev);
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_policy *new_policy)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	old_policy = kbdev->pm.backend.pm_current_policy;
+	kbdev->pm.backend.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+								old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+								new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* If any core power state changes were previously attempted, but
+	 * couldn't be made because the policy was changing (current_policy was
+	 * NULL), then re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Now the policy change is finished, we release our fake context active
+	 * reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+				bool tiler_required, bool shader_required)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (shader_required) {
+		int cnt = ++kbdev->shader_needed_cnt;
+
+		if (cnt == 1)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0);
+	}
+
+	if (tiler_required) {
+		int cnt = ++kbdev->tiler_needed_cnt;
+
+		if (cnt == 1)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, kbdev->shader_needed_cnt);
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL,
+				NULL, 0u, kbdev->tiler_needed_cnt);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+				bool tiler_required, bool shader_required)
+{
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (shader_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0);
+
+		cnt = --kbdev->shader_needed_cnt;
+
+		if (0 == cnt) {
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+	}
+
+	if (tiler_required) {
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		cnt = --kbdev->tiler_needed_cnt;
+
+		if (0 == cnt)
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL,
+				NULL, 0u, kbdev->shader_needed_cnt);
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL,
+				NULL, 0u, kbdev->tiler_needed_cnt);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required)
+{
+	unsigned long flags;
+
+	kbase_pm_wait_for_poweroff_complete(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_request_cores(kbdev, tiler_required, shader_required);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+static void kbase_pm_l2_caches_ref(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* Check for the required L2 transitions.
+	 * Caller would block here for the L2 caches of all core groups to be
+	 * powered on, so need to inform the Hw to power up all the L2 caches.
+	 * Can't rely on the l2_users_count value being non-zero previously to
+	 * avoid checking for the transition, as the count could be non-zero
+	 * even if not all the instances of L2 cache are powered up since
+	 * currently the power status of L2 is not tracked separately for each
+	 * core group. Also if the GPU is reset while the L2 is on, L2 will be
+	 * off but the count will be non-zero.
+	 */
+	kbase_pm_check_transitions_nolock(kbdev);
+}
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Take the reference on l2_users_count and check core transitions.
+	 */
+	kbase_pm_l2_caches_ref(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	wait_event(kbdev->pm.backend.l2_powered_wait,
+					kbdev->pm.backend.l2_powered == 1);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev)
+{
+	/* Take the reference on l2_users_count and check core transitions.
+	 */
+	kbase_pm_l2_caches_ref(kbdev);
+}
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->l2_users_count++;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count)
+		kbase_pm_check_transitions_nolock(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 0000000000000..2e86929c7e42f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,247 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ *         initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores - Request the desired cores to be powered up.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
+ *
+ * Called by the scheduler to request power to the desired cores.
+ *
+ * There is no guarantee that the HW will be powered up on return. Use
+ * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are
+ * now powered, or instead call kbase_pm_request_cores_sync().
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required,
+		bool shader_required);
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required);
+
+/**
+ * kbase_pm_release_cores - Request the desired cores to be powered down.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
+ *
+ * Called by the scheduler to release its power reference on the desired cores.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
+		bool shader_required);
+
+/**
+ * kbase_pm_cores_requested - Check that a power request has been locked into
+ *                            the HW.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
+ *
+ * Called by the scheduler to check if a power on request has been locked into
+ * the HW.
+ *
+ * Note that there is no guarantee that the cores are actually ready, however
+ * when the request has been locked into the HW, then it is safe to submit work
+ * since the HW will wait for the transition to ready.
+ *
+ * A reference must first be taken prior to making this call.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: true if the request to the HW was successfully made else false if the
+ *         request is still pending.
+ */
+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((shader_required && !kbdev->shader_available_bitmap) ||
+			(tiler_required && !kbdev->tiler_available_bitmap))
+		return false;
+
+	return true;
+}
+
+/**
+ * kbase_pm_cores_ready -  Check that the required cores have been powered on by
+ *                         the HW.
+ * @kbdev:           Kbase device
+ * @tiler_required:  true if tiler is required
+ * @shader_required: true if shaders are required
+ *
+ * Called by the scheduler to check if cores are ready.
+ *
+ * Note that the caller should ensure that they have first requested cores
+ * before calling this function.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: true if the cores are ready.
+ */
+static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev,
+		bool tiler_required, bool shader_required)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if ((shader_required && !kbdev->shader_ready_bitmap) ||
+			(tiler_required && !kbdev->tiler_available_bitmap))
+		return false;
+
+	return true;
+}
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_nolock - Request l2 caches, nolock version
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups and power up without
+ * waiting for power manager to actually power up the cores. This is done
+ * because the call to this function is done from within the atomic context
+ * and the actual l2 caches being powered up is checked at a later stage.
+ * The reference taken on l2 caches is removed when the protected mode atom
+ * is released so there is no need to make a call to a matching
+ * release_l2_caches().
+ *
+ * This function is used specifically for the case when l2 caches are
+ * to be powered up as part of the sequence for entering protected mode.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_release_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 0000000000000..5e1b761cf43c1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts)
+{
+	u32 hi1, hi2;
+
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
+		*cycle_counter = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
+		*cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+	 * correctly */
+	do {
+		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
+		*system_time = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TIMESTAMP_LO));
+		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
+		*system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(ts);
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush -  Wait for GPU write flush
+ * @kbdev: Kbase device
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+	u32 base_count = 0;
+
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
+	kbase_pm_context_active(kbdev);
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+
+	while (true) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(CYCLE_COUNT_LO));
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 0000000000000..ece70092b48ad
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kbdev:	Kbase device
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_device *kbdev);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/build.bp b/drivers/gpu/arm/midgard/build.bp
new file mode 100644
index 0000000000000..ada69207c3d30
--- /dev/null
+++ b/drivers/gpu/arm/midgard/build.bp
@@ -0,0 +1,108 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+/* Kernel-side tests may include mali_kbase's headers. Therefore any config
+ * options which affect the sizes of any structs (e.g. adding extra members)
+ * must be included in these defaults, so that the structs are consistent in
+ * both mali_kbase and the test modules. */
+bob_defaults {
+    name: "mali_kbase_shared_config_defaults",
+    no_mali: {
+        kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
+    },
+    mali_corestack: {
+        kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+    },
+    mali_devfreq: {
+        kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
+    },
+    mali_midgard_dvfs: {
+        kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
+    },
+    mali_debug: {
+        kbuild_options: ["CONFIG_MALI_DEBUG=y"],
+    },
+    mali_fpga_bus_logger: {
+        kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
+    },
+    cinstr_job_dump: {
+        kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
+    },
+    mali_gator_support: {
+        kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
+    },
+    mali_system_trace: {
+        kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
+    },
+    mali_pwrsoft_765: {
+        kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
+    },
+    kbuild_options: [
+        "MALI_UNIT_TEST={{.unit_test_code}}",
+        "MALI_CUSTOMER_RELEASE={{.release}}",
+        "MALI_USE_CSF={{.gpu_has_csf}}",
+        "MALI_KERNEL_TEST_API={{.debug}}",
+    ],
+    defaults: ["kernel_defaults"],
+}
+
+bob_kernel_module {
+    name: "mali_kbase",
+    srcs: [
+        "*.c",
+        "*.h",
+        "Kbuild",
+        "backend/gpu/*.c",
+        "backend/gpu/*.h",
+        "backend/gpu/Kbuild",
+        "ipa/*.c",
+        "ipa/*.h",
+        "ipa/Kbuild",
+        "platform/*.h",
+        "platform/*/*.c",
+        "platform/*/*.h",
+        "platform/*/Kbuild",
+        "thirdparty/*.c",
+    ],
+    kbuild_options: [
+        "CONFIG_MALI_KUTF=n",
+        "CONFIG_MALI_MIDGARD=m",
+        "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
+        "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
+        "MALI_MOCK_TEST={{.mali_mock_test}}",
+    ],
+    mali_error_inject: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
+    },
+    mali_error_inject_random: {
+        kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
+    },
+    cinstr_secondary_hwc: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
+    },
+    mali_2mb_alloc: {
+        kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+    },
+    mali_mock_test: {
+        srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
+    },
+    gpu_has_csf: {
+        srcs: [
+            "csf/*.c",
+            "csf/*.h",
+            "csf/Kbuild",
+        ],
+    },
+    defaults: ["mali_kbase_shared_config_defaults"],
+}
+
+optional_subdirs = ["tests"]
diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 0000000000000..6498dcbc1840d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,132 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 0000000000000..a15b558114828
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,117 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 0000000000000..6b87335931919
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild
new file mode 100644
index 0000000000000..297d7f90ae7fc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	ipa/mali_kbase_ipa_simple.o \
+	ipa/mali_kbase_ipa.o
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
+  mali_kbase-y += \
+	ipa/mali_kbase_ipa_vinstr_g7x.o \
+	ipa/mali_kbase_ipa_vinstr_common.o
+endif
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
new file mode 100644
index 0000000000000..15566f63c6e87
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -0,0 +1,649 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/of.h>
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#define dev_pm_opp_find_freq_exact opp_find_freq_exact
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp opp
+#endif
+
+#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
+#define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
+#define KBASE_IPA_G72_MODEL_NAME      "mali-g72-power-model"
+#define KBASE_IPA_TNOX_MODEL_NAME     "mali-tnox-power-model"
+#define KBASE_IPA_TGOX_R1_MODEL_NAME  "mali-tgox_r1-power-model"
+
+static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+	&kbase_simple_ipa_model_ops,
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_tnox_ipa_model_ops
+};
+
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
+{
+	int err = 0;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->recalculate) {
+		err = model->ops->recalculate(model);
+		if (err) {
+			dev_err(model->kbdev->dev,
+				"recalculation of power model %s returned error %d\n",
+				model->ops->name, err);
+		}
+	}
+
+	return err;
+}
+
+static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							    const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
+		struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+const char *kbase_ipa_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			return KBASE_IPA_G71_MODEL_NAME;
+		case GPU_ID2_PRODUCT_THEX:
+			return KBASE_IPA_G72_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TNOX:
+			return KBASE_IPA_TNOX_MODEL_NAME;
+		case GPU_ID2_PRODUCT_TGOX:
+			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+				/* TGOX r0 shares a power model with TNOX */
+				return KBASE_IPA_TNOX_MODEL_NAME;
+			else
+				return KBASE_IPA_TGOX_R1_MODEL_NAME;
+		default:
+			return KBASE_IPA_FALLBACK_MODEL_NAME;
+		}
+	}
+
+	return KBASE_IPA_FALLBACK_MODEL_NAME;
+}
+
+static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
+{
+	struct device_node *model_dt_node;
+	char compat_string[64];
+
+	snprintf(compat_string, sizeof(compat_string), "arm,%s",
+		 model->ops->name);
+
+	/* of_find_compatible_node() will call of_node_put() on the root node,
+	 * so take a reference on it first.
+	 */
+	of_node_get(model->kbdev->dev->of_node);
+	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
+						NULL, compat_string);
+	if (!model_dt_node && !model->missing_dt_node_warning) {
+		dev_warn(model->kbdev->dev,
+			 "Couldn't find power_model DT node matching \'%s\'\n",
+			 compat_string);
+		model->missing_dt_node_warning = true;
+	}
+
+	return model_dt_node;
+}
+
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required)
+{
+	int err, i;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	char *origin;
+
+	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		memset(addr, 0, sizeof(s32) * num_elems);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
+			 err, model->ops->name, name, num_elems);
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		origin = "DT";
+	}
+
+	/* Create a unique debugfs entry for each element */
+	for (i = 0; i < num_elems; ++i) {
+		char elem_name[32];
+
+		if (num_elems == 1)
+			snprintf(elem_name, sizeof(elem_name), "%s", name);
+		else
+			snprintf(elem_name, sizeof(elem_name), "%s.%d",
+				name, i);
+
+		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
+			model->ops->name, elem_name, addr[i], origin);
+
+		err = kbase_ipa_model_param_add(model, elem_name,
+						&addr[i], sizeof(s32),
+						PARAM_TYPE_S32);
+		if (err)
+			goto exit;
+	}
+exit:
+	return err;
+}
+
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required)
+{
+	int err;
+	struct device_node *model_dt_node = get_model_dt_node(model);
+	const char *string_prop_value;
+	char *origin;
+
+	err = of_property_read_string(model_dt_node, name,
+				      &string_prop_value);
+
+	/* We're done with model_dt_node now, so drop the reference taken in
+	 * get_model_dt_node()/of_find_compatible_node().
+	 */
+	of_node_put(model_dt_node);
+
+	if (err && dt_required) {
+		strncpy(addr, "", size - 1);
+		dev_warn(model->kbdev->dev,
+			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
+			 err, model->ops->name, name, addr);
+		err = 0;
+		origin = "zero";
+	} else if (err && !dt_required) {
+		origin = "default";
+	} else /* !err */ {
+		strncpy(addr, string_prop_value, size - 1);
+		origin = "DT";
+	}
+
+	addr[size - 1] = '\0';
+
+	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
+		model->ops->name, name, string_prop_value, origin);
+
+	err = kbase_ipa_model_param_add(model, name, addr, size,
+					PARAM_TYPE_STRING);
+	return err;
+}
+
+void kbase_ipa_term_model(struct kbase_ipa_model *model)
+{
+	if (!model)
+		return;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (model->ops->term)
+		model->ops->term(model);
+
+	kbase_ipa_model_param_free_all(model);
+
+	kfree(model);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
+
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops)
+{
+	struct kbase_ipa_model *model;
+	int err;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	if (!ops || !ops->name)
+		return NULL;
+
+	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
+	if (!model)
+		return NULL;
+
+	model->kbdev = kbdev;
+	model->ops = ops;
+	INIT_LIST_HEAD(&model->params);
+
+	err = model->ops->init(model);
+	if (err) {
+		dev_err(kbdev->dev,
+			"init of power model \'%s\' returned error %d\n",
+			ops->name, err);
+		kfree(model);
+		return NULL;
+	}
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err) {
+		kbase_ipa_term_model(model);
+		return NULL;
+	}
+
+	return model;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
+
+static void kbase_ipa_term_locked(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	/* Clean up the models */
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_term_model(kbdev->ipa.configured_model);
+	kbase_ipa_term_model(kbdev->ipa.fallback_model);
+
+	kbdev->ipa.configured_model = NULL;
+	kbdev->ipa.fallback_model = NULL;
+}
+
+int kbase_ipa_init(struct kbase_device *kbdev)
+{
+
+	const char *model_name;
+	struct kbase_ipa_model_ops *ops;
+	struct kbase_ipa_model *default_model = NULL;
+	int err;
+
+	mutex_init(&kbdev->ipa.lock);
+	/*
+	 * Lock during init to avoid warnings from lockdep_assert_held (there
+	 * shouldn't be any concurrent access yet).
+	 */
+	mutex_lock(&kbdev->ipa.lock);
+
+	/* The simple IPA model must *always* be present.*/
+	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
+
+	default_model = kbase_ipa_init_model(kbdev, ops);
+	if (!default_model) {
+		err = -EINVAL;
+		goto end;
+	}
+
+	kbdev->ipa.fallback_model = default_model;
+	err = of_property_read_string(kbdev->dev->of_node,
+				      "ipa-model",
+				      &model_name);
+	if (err) {
+		/* Attempt to load a match from GPU-ID */
+		u32 gpu_id;
+
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		model_name = kbase_ipa_model_name_from_id(gpu_id);
+		dev_dbg(kbdev->dev,
+			"Inferring model from GPU ID 0x%x: \'%s\'\n",
+			gpu_id, model_name);
+		err = 0;
+	} else {
+		dev_dbg(kbdev->dev,
+			"Using ipa-model parameter from DT: \'%s\'\n",
+			model_name);
+	}
+
+	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
+		ops = kbase_ipa_model_ops_find(kbdev, model_name);
+		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
+		if (!kbdev->ipa.configured_model) {
+			dev_warn(kbdev->dev,
+				"Failed to initialize ipa-model: \'%s\'\n"
+				"Falling back on default model\n",
+				model_name);
+			kbdev->ipa.configured_model = default_model;
+		}
+	} else {
+		kbdev->ipa.configured_model = default_model;
+	}
+
+end:
+	if (err)
+		kbase_ipa_term_locked(kbdev);
+	else
+		dev_info(kbdev->dev,
+			 "Using configured power model %s, and fallback %s\n",
+			 kbdev->ipa.configured_model->ops->name,
+			 kbdev->ipa.fallback_model->ops->name);
+
+	mutex_unlock(&kbdev->ipa.lock);
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_init);
+
+void kbase_ipa_term(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+	kbase_ipa_term_locked(kbdev);
+	mutex_unlock(&kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_term);
+
+/**
+ * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
+ * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
+ *		0 < c < 2^26 to prevent overflow.
+ * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Keep a record of the approximate range of each value at every stage of the
+ * calculation, to ensure we don't overflow. This makes heavy use of the
+ * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
+ * calculations in decimal for increased accuracy.
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
+				     const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^3 < f_MHz < 2^10 MHz */
+	const u32 f_MHz = freq / 1000000;
+
+	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
+	const u32 v2f_big = v2 * f_MHz;
+
+	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
+	const u32 v2f = v2f_big / 1000;
+
+	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
+	 * Must be < 2^42 to avoid overflowing the return value. */
+	const u64 v2fc = (u64) c * (u64) v2f;
+
+	/* Range: 0 < v2fc / 1000 < 2^13 mW */
+	return div_u64(v2fc, 1000);
+}
+
+/**
+ * kbase_scale_static_power() - Scale a static power coefficient to an OPP
+ * @c:		Static model coefficient, in uW/V^3. Should be in range
+ *		0 < c < 2^32 to prevent overflow.
+ * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
+ *
+ * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
+ */
+u32 kbase_scale_static_power(const u32 c, const u32 voltage)
+{
+	/* Range: 2^8 < v2 < 2^16 m(V^2) */
+	const u32 v2 = (voltage * voltage) / 1000;
+
+	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
+	const u32 v3_big = v2 * voltage;
+
+	/* Range: 2^7 < v3 < 2^19 m(V^3) */
+	const u32 v3 = v3_big / 1000;
+
+	/*
+	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
+	 * The result should be < 2^52 to avoid overflowing the return value.
+	 */
+	const u64 v3c_big = (u64) c * (u64) v3;
+
+	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
+	return div_u64(v3c_big, 1000000);
+}
+
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Record the event of GPU entering protected mode. */
+	kbdev->ipa_protection_mode_switched = true;
+}
+
+static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_model *model;
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbdev->ipa_protection_mode_switched)
+		model = kbdev->ipa.fallback_model;
+	else
+		model = kbdev->ipa.configured_model;
+
+	/*
+	 * Having taken cognizance of the fact that whether GPU earlier
+	 * protected mode or not, the event can be now reset (if GPU is not
+	 * currently in protected mode) so that configured model is used
+	 * for the next sample.
+	 */
+	if (!kbdev->protected_mode)
+		kbdev->ipa_protection_mode_switched = false;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return model;
+}
+
+static u32 get_static_power_locked(struct kbase_device *kbdev,
+				   struct kbase_ipa_model *model,
+				   unsigned long voltage)
+{
+	u32 power = 0;
+	int err;
+	u32 power_coeff;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!model->ops->get_static_coeff)
+		model = kbdev->ipa.fallback_model;
+
+	if (model->ops->get_static_coeff) {
+		err = model->ops->get_static_coeff(model, &power_coeff);
+		if (!err)
+			power = kbase_scale_static_power(power_coeff,
+							 (u32) voltage);
+	}
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_static_power(struct devfreq *df,
+					    unsigned long voltage)
+#else
+static unsigned long kbase_get_static_power(unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = get_current_model(kbdev);
+	power = get_static_power_locked(kbdev, model, voltage);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+static unsigned long kbase_get_dynamic_power(struct devfreq *df,
+					     unsigned long freq,
+					     unsigned long voltage)
+#else
+static unsigned long kbase_get_dynamic_power(unsigned long freq,
+					     unsigned long voltage)
+#endif
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0, power = 0;
+	int err = 0;
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+#else
+	struct kbase_device *kbdev = kbase_find_device(-1);
+#endif
+
+	mutex_lock(&kbdev->ipa.lock);
+
+	model = kbdev->ipa.fallback_model;
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	if (!err)
+		power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+	else
+		dev_err_ratelimited(kbdev->dev,
+				    "Model %s returned error code %d\n",
+				    model->ops->name, err);
+
+	mutex_unlock(&kbdev->ipa.lock);
+
+#if !(defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
+	kbase_release_device(kbdev);
+#endif
+
+	return power;
+}
+
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	struct kbase_ipa_model *model;
+	u32 power_coeff = 0;
+	int err = 0;
+	struct kbasep_pm_metrics diff;
+	u64 total_time;
+
+	lockdep_assert_held(&kbdev->ipa.lock);
+
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
+
+	model = get_current_model(kbdev);
+
+	err = model->ops->get_dynamic_coeff(model, &power_coeff);
+
+	/* If the counter model returns an error (e.g. switching back to
+	 * protected mode and failing to read counters, or a counter sample
+	 * with too few cycles), revert to the fallback model.
+	 */
+	if (err && model != kbdev->ipa.fallback_model) {
+		model = kbdev->ipa.fallback_model;
+		err = model->ops->get_dynamic_coeff(model, &power_coeff);
+	}
+
+	if (err)
+		return err;
+
+	*power = kbase_scale_dynamic_power(power_coeff, freq, voltage);
+
+	/* time_busy / total_time cannot be >1, so assigning the 64-bit
+	 * result of div_u64 to *power cannot overflow.
+	 */
+	total_time = diff.time_busy + (u64) diff.time_idle;
+	*power = div_u64(*power * (u64) diff.time_busy,
+			 max(total_time, 1ull));
+
+	*power += get_static_power_locked(kbdev, model, voltage);
+
+	return err;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
+
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage)
+{
+	int ret;
+	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
+
+	mutex_lock(&kbdev->ipa.lock);
+	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_get_real_power);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops kbase_ipa_power_model_ops = {
+#else
+struct devfreq_cooling_power kbase_ipa_power_model_ops = {
+#endif
+	.get_static_power = &kbase_get_static_power,
+	.get_dynamic_power = &kbase_get_dynamic_power,
+#if defined(CONFIG_MALI_PWRSOFT_765) || \
+	LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+	.get_real_power = &kbase_get_real_power,
+#endif
+};
+KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
new file mode 100644
index 0000000000000..4656ded0f0ee3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -0,0 +1,229 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_H_
+#define _KBASE_IPA_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+struct devfreq;
+
+/**
+ * struct kbase_ipa_model - Object describing a particular IPA model.
+ * @kbdev:                    pointer to kbase device
+ * @model_data:               opaque pointer to model specific data, accessed
+ *                            only by model specific methods.
+ * @ops:                      pointer to object containing model specific methods.
+ * @params:                   head of the list of debugfs params added for model
+ * @missing_dt_node_warning:  flag to limit the matching power model DT not found
+ *                            warning to once.
+ */
+struct kbase_ipa_model {
+	struct kbase_device *kbdev;
+	void *model_data;
+	struct kbase_ipa_model_ops *ops;
+	struct list_head params;
+	bool missing_dt_node_warning;
+};
+
+/**
+ * kbase_ipa_model_add_param_s32 - Add an integer model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @num_elems:	number of elements (1 if not an array)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
+				  const char *name, s32 *addr,
+				  size_t num_elems, bool dt_required);
+
+/**
+ * kbase_ipa_model_add_param_string - Add a string model parameter
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @addr:	address where the value is stored
+ * @size:	size, in bytes, of the value storage (so the maximum string
+ *		length is size - 1)
+ * @dt_required: if false, a corresponding devicetree entry is not required,
+ *		 and the current value will be used. If true, a warning is
+ *		 output and the data is zeroed
+ *
+ * Return: 0 on success, or an error code
+ */
+int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
+				     const char *name, char *addr,
+				     size_t size, bool dt_required);
+
+struct kbase_ipa_model_ops {
+	char *name;
+	/* The init, recalculate and term ops on the default model are always
+	 * called.  However, all the other models are only invoked if the model
+	 * is selected in the device tree. Otherwise they are never
+	 * initialized. Additional resources can be acquired by models in
+	 * init(), however they must be terminated in the term().
+	 */
+	int (*init)(struct kbase_ipa_model *model);
+	/* Called immediately after init(), or when a parameter is changed, so
+	 * that any coefficients derived from model parameters can be
+	 * recalculated. */
+	int (*recalculate)(struct kbase_ipa_model *model);
+	void (*term)(struct kbase_ipa_model *model);
+	/*
+	 * get_dynamic_coeff() - calculate dynamic power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which
+	 * is then scaled by the IPA framework according to the current OPP's
+	 * frequency and voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+	/*
+	 * get_static_coeff() - calculate static power coefficient
+	 * @model:		pointer to model
+	 * @coeffp:		pointer to return value location
+	 *
+	 * Calculate a static power coefficient, with units uW/(V^3), which is
+	 * scaled by the IPA framework according to the current OPP's voltage.
+	 *
+	 * Return: 0 on success, or an error code.
+	 */
+	int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp);
+};
+
+/**
+ * kbase_ipa_init - Initialize the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * simple IPA power model is initialized as a fallback model and if that
+ * initialization fails then IPA is not used.
+ * The device tree is read for the name of ipa model to be used, by using the
+ * property string "ipa-model". If that ipa model is supported then it is
+ * initialized but if the initialization fails then simple power model is used.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_term - Terminate the IPA feature
+ * @kbdev:      pointer to kbase device
+ *
+ * Both simple IPA power model and model retrieved from device tree are
+ * terminated.
+ */
+void kbase_ipa_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_model_recalculate - Recalculate the model coefficients
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * It shall be called immediately after the model has been initialized
+ * or when the model parameter has changed, so that any coefficients
+ * derived from parameters can be recalculated.
+ * Its a wrapper for the module specific recalculate() method.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_init_model - Initilaize the particular IPA model
+ * @kbdev:      pointer to kbase device
+ * @ops:        pointer to object containing model specific methods.
+ *
+ * Initialize the model corresponding to the @ops pointer passed.
+ * The init() method specified in @ops would be called.
+ *
+ * Return: pointer to kbase_ipa_model on success, NULL on error
+ */
+struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
+					     struct kbase_ipa_model_ops *ops);
+/**
+ * kbase_ipa_term_model - Terminate the particular IPA model
+ * @model:      pointer to the IPA model object, already initialized
+ *
+ * Terminate the model, using the term() method.
+ * Module specific parameters would be freed.
+ */
+void kbase_ipa_term_model(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into
+ *                                          protected mode
+ * @kbdev:      pointer to kbase device
+ *
+ * Makes IPA aware of the GPU switching to protected mode.
+ */
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
+
+extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
+extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops;
+
+/**
+ * kbase_get_real_power() - get the real power consumption of the GPU
+ * @df: dynamic voltage and frequency scaling information for the GPU.
+ * @power: where to store the power consumption, in mW.
+ * @freq: a frequency, in HZ.
+ * @voltage: a voltage, in mV.
+ *
+ * The returned value incorporates both static and dynamic power consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_get_real_power(struct devfreq *df, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+
+#if MALI_UNIT_TEST
+/* Called by kbase_get_real_power() to invoke the power models.
+ * Must be called with kbdev->ipa.lock held.
+ * This function is only exposed for use by unit tests.
+ */
+int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
+				unsigned long freq,
+				unsigned long voltage);
+#endif /* MALI_UNIT_TEST */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops kbase_ipa_power_model_ops;
+#else
+extern struct devfreq_cooling_power kbase_ipa_power_model_ops;
+#endif
+
+#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{ }
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
new file mode 100644
index 0000000000000..071a53080bae5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -0,0 +1,288 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_ipa.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+
+struct kbase_ipa_model_param {
+	char *name;
+	union {
+		void *voidp;
+		s32 *s32p;
+		char *str;
+	} addr;
+	size_t size;
+	enum kbase_ipa_model_param_type type;
+	struct kbase_ipa_model *model;
+	struct list_head link;
+};
+
+static int param_int_get(void *data, u64 *val)
+{
+	struct kbase_ipa_model_param *param = data;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	*(s64 *) val = *param->addr.s32p;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int param_int_set(void *data, u64 val)
+{
+	struct kbase_ipa_model_param *param = data;
+	struct kbase_ipa_model *model = param->model;
+	s64 sval = (s64) val;
+	s32 old_val;
+	int err = 0;
+
+	if (sval < S32_MIN || sval > S32_MAX)
+		return -ERANGE;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	old_val = *param->addr.s32p;
+	*param->addr.s32p = val;
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0)
+		*param->addr.s32p = old_val;
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return err;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n");
+
+static ssize_t param_string_get(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	ssize_t ret;
+	size_t len;
+
+	mutex_lock(&param->model->kbdev->ipa.lock);
+	len = strnlen(param->addr.str, param->size - 1) + 1;
+	ret = simple_read_from_buffer(user_buf, count, ppos,
+				      param->addr.str, len);
+	mutex_unlock(&param->model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static ssize_t param_string_set(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct kbase_ipa_model_param *param = file->private_data;
+	struct kbase_ipa_model *model = param->model;
+	char *old_str = NULL;
+	ssize_t ret = count;
+	size_t buf_size;
+	int err;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	if (count > param->size) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL);
+	if (!old_str) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	buf_size = min(param->size - 1, count);
+	if (copy_from_user(param->addr.str, user_buf, buf_size)) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	param->addr.str[buf_size] = '\0';
+
+	err = kbase_ipa_model_recalculate(model);
+	if (err < 0) {
+		ret = err;
+		strlcpy(param->addr.str, old_str, param->size);
+	}
+
+end:
+	kfree(old_str);
+	mutex_unlock(&model->kbdev->ipa.lock);
+
+	return ret;
+}
+
+static const struct file_operations fops_string = {
+	.read = param_string_get,
+	.write = param_string_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type)
+{
+	struct kbase_ipa_model_param *param;
+
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+
+	if (!param)
+		return -ENOMEM;
+
+	/* 'name' is stack-allocated for array elements, so copy it into
+	 * heap-allocated storage */
+	param->name = kstrdup(name, GFP_KERNEL);
+
+	if (!param->name) {
+		kfree(param);
+		return -ENOMEM;
+	}
+
+	param->addr.voidp = addr;
+	param->size = size;
+	param->type = type;
+	param->model = model;
+
+	list_add(&param->link, &model->params);
+
+	return 0;
+}
+
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_param *param_p, *param_n;
+
+	list_for_each_entry_safe(param_p, param_n, &model->params, link) {
+		list_del(&param_p->link);
+		kfree(param_p->name);
+		kfree(param_p);
+	}
+}
+
+static int current_power_get(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+	struct devfreq *df = kbdev->devfreq;
+	u32 power;
+
+	kbase_pm_context_active(kbdev);
+	kbase_get_real_power(df, &power,
+		kbdev->current_nominal_freq, (kbdev->current_voltage / 1000));
+	kbase_pm_context_idle(kbdev);
+
+	*val = power;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n");
+
+static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
+{
+	struct list_head *it;
+	struct dentry *dir;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	dir = debugfs_create_dir(model->ops->name,
+				 model->kbdev->mali_debugfs_directory);
+
+	if (!dir) {
+		dev_err(model->kbdev->dev,
+			"Couldn't create mali debugfs %s directory",
+			model->ops->name);
+		return;
+	}
+
+	list_for_each(it, &model->params) {
+		struct kbase_ipa_model_param *param =
+				list_entry(it,
+					   struct kbase_ipa_model_param,
+					   link);
+		const struct file_operations *fops = NULL;
+
+		switch (param->type) {
+		case PARAM_TYPE_S32:
+			fops = &fops_s32;
+			break;
+		case PARAM_TYPE_STRING:
+			fops = &fops_string;
+			break;
+		}
+
+		if (unlikely(!fops)) {
+			dev_err(model->kbdev->dev,
+				"Type not set for %s parameter %s\n",
+				model->ops->name, param->name);
+		} else {
+			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+					    dir, param, fops);
+		}
+	}
+}
+
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val)
+{
+	struct kbase_ipa_model_param *param;
+
+	mutex_lock(&model->kbdev->ipa.lock);
+
+	list_for_each_entry(param, &model->params, link) {
+		if (!strcmp(param->name, name)) {
+			if (param->type == PARAM_TYPE_S32) {
+				*param->addr.s32p = val;
+			} else {
+				dev_err(model->kbdev->dev,
+					"Wrong type for %s parameter %s\n",
+					model->ops->name, param->name);
+			}
+			break;
+		}
+	}
+
+	mutex_unlock(&model->kbdev->ipa.lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32);
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->ipa.lock);
+
+	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
+		kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
+	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+
+	debugfs_create_file("ipa_current_power", 0444,
+			kbdev->mali_debugfs_directory, kbdev, &current_power);
+
+	mutex_unlock(&kbdev->ipa.lock);
+}
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
new file mode 100644
index 0000000000000..a983d9c142166
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_DEBUGFS_H_
+#define _KBASE_IPA_DEBUGFS_H_
+
+enum kbase_ipa_model_param_type {
+	PARAM_TYPE_S32 = 1,
+	PARAM_TYPE_STRING,
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+void kbase_ipa_debugfs_init(struct kbase_device *kbdev);
+int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name,
+			      void *addr, size_t size,
+			      enum kbase_ipa_model_param_type type);
+void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model);
+
+/**
+ * kbase_ipa_model_param_set_s32 - Set an integer model parameter
+ *
+ * @model:	pointer to IPA model
+ * @name:	name of corresponding debugfs entry
+ * @val:	new value of the parameter
+ *
+ * This function is only exposed for use by unit tests running in
+ * kernel space. Normally it is expected that parameter values will
+ * instead be set via debugfs.
+ */
+void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model,
+	const char *name, s32 val);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model,
+					    const char *name, void *addr,
+					    size_t size,
+					    enum kbase_ipa_model_param_type type)
+{
+	return 0;
+}
+
+static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model)
+{ }
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _KBASE_IPA_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
new file mode 100644
index 0000000000000..e684df4a6662c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -0,0 +1,350 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <uapi/linux/thermal.h>
+#include <linux/thermal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_ipa_simple.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#if MALI_UNIT_TEST
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+static unsigned long dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	unsigned long *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+
+#else
+static int dummy_temp;
+
+static int kbase_simple_power_model_get_dummy_temp(
+	struct thermal_zone_device *tz,
+	int *temp)
+{
+	*temp = READ_ONCE(dummy_temp);
+	return 0;
+}
+#endif
+
+/* Intercept calls to the kernel function using a macro */
+#ifdef thermal_zone_get_temp
+#undef thermal_zone_get_temp
+#endif
+#define thermal_zone_get_temp(tz, temp) \
+	kbase_simple_power_model_get_dummy_temp(tz, temp)
+
+void kbase_simple_power_model_set_dummy_temp(int temp)
+{
+	WRITE_ONCE(dummy_temp, temp);
+}
+KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp);
+
+#endif /* MALI_UNIT_TEST */
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms. The additional resources in this model
+ * should preferably be minimal, as this model is rarely used when a dynamic
+ * model is available.
+ */
+
+/**
+ * struct kbase_ipa_model_simple_data - IPA context per device
+ * @dynamic_coefficient: dynamic coefficient of the model
+ * @static_coefficient:  static coefficient of the model
+ * @ts:                  Thermal scaling coefficients of the model
+ * @tz_name:             Thermal zone name
+ * @gpu_tz:              thermal zone device
+ * @poll_temperature_thread: Handle for temperature polling thread
+ * @current_temperature: Most recent value of polled temperature
+ * @temperature_poll_interval_ms: How often temperature should be checked, in ms
+ */
+
+struct kbase_ipa_model_simple_data {
+	u32 dynamic_coefficient;
+	u32 static_coefficient;
+	s32 ts[4];
+	char tz_name[THERMAL_NAME_LENGTH];
+	struct thermal_zone_device *gpu_tz;
+	struct task_struct *poll_temperature_thread;
+	int current_temperature;
+	int temperature_poll_interval_ms;
+};
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+/**
+ * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient
+ * @ts:		Signed coefficients, in order t^0 to t^3, with units Deg^-N
+ * @t:		Temperature, in mDeg C. Range: -2^17 < t < 2^17
+ *
+ * Scale the temperature according to a cubic polynomial whose coefficients are
+ * provided in the device tree. The result is used to scale the static power
+ * coefficient, where 1000000 means no change.
+ *
+ * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000.
+ */
+static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t)
+{
+	/* Range: -2^24 < t2 < 2^24 m(Deg^2) */
+	const s64 t2 = div_s64((t * t), 1000);
+
+	/* Range: -2^31 < t3 < 2^31 m(Deg^3) */
+	const s64 t3 = div_s64((t * t2), 1000);
+
+	/*
+	 * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in
+	 * Deg^-N, so we need to multiply the last coefficient by 1000.
+	 * Range: -2^63 < res_big < 2^63
+	 */
+	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
+			  + ts[2] * t2    /* +/- 2^55 */
+			  + ts[1] * t     /* +/- 2^48 */
+			  + ts[0] * 1000; /* +/- 2^41 */
+
+	/* Range: -2^60 < res_unclamped < 2^60 */
+	s64 res_unclamped = div_s64(res_big, 1000);
+
+	/* Clamp to range of 0x to 10x the static power */
+	return clamp(res_unclamped, (s64) 0, (s64) 10000000);
+}
+
+/* We can't call thermal_zone_get_temp() directly in model_static_coeff(),
+ * because we don't know if tz->lock is held in the same thread. So poll it in
+ * a separate thread to get around this. */
+static int poll_temperature(void *data)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *) data;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temp;
+#else
+	int temp;
+#endif
+
+	while (!kthread_should_stop()) {
+		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
+
+		if (tz) {
+			int ret;
+
+			ret = thermal_zone_get_temp(tz, &temp);
+			if (ret) {
+				pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+						    ret);
+				temp = FALLBACK_STATIC_TEMPERATURE;
+			}
+		} else {
+			temp = FALLBACK_STATIC_TEMPERATURE;
+		}
+
+		WRITE_ONCE(model_data->current_temperature, temp);
+
+		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
+	}
+
+	return 0;
+}
+
+static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	u32 temp_scaling_factor;
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+	u64 coeff_big;
+	int temp;
+
+	temp = READ_ONCE(model_data->current_temperature);
+
+	/* Range: 0 <= temp_scaling_factor < 2^24 */
+	temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts,
+							    temp);
+
+	/*
+	 * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This
+	 * means static_coefficient must be in range
+	 * 0 <= static_coefficient < 2^28.
+	 */
+	coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor;
+	*coeffp = div_u64(coeff_big, 1000000);
+
+	return 0;
+}
+
+static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+		(struct kbase_ipa_model_simple_data *) model->model_data;
+
+	*coeffp = model_data->dynamic_coefficient;
+
+	return 0;
+}
+
+static int add_params(struct kbase_ipa_model *model)
+{
+	int err = 0;
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
+					    &model_data->static_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
+					    &model_data->dynamic_coefficient,
+					    1, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_s32(model, "ts",
+					    model_data->ts, 4, true);
+	if (err)
+		goto end;
+
+	err = kbase_ipa_model_add_param_string(model, "thermal-zone",
+					       model_data->tz_name,
+					       sizeof(model_data->tz_name), true);
+	if (err)
+		goto end;
+
+	model_data->temperature_poll_interval_ms = 200;
+	err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms",
+					    &model_data->temperature_poll_interval_ms,
+					    1, false);
+
+end:
+	return err;
+}
+
+static int kbase_simple_power_model_init(struct kbase_ipa_model *model)
+{
+	int err;
+	struct kbase_ipa_model_simple_data *model_data;
+
+	model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data),
+			     GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model->model_data = (void *) model_data;
+
+	model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE;
+	model_data->poll_temperature_thread = kthread_run(poll_temperature,
+							  (void *) model_data,
+							  "mali-simple-power-model-temp-poll");
+	if (IS_ERR(model_data->poll_temperature_thread)) {
+		kfree(model_data);
+		return PTR_ERR(model_data->poll_temperature_thread);
+	}
+
+	err = add_params(model);
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kthread_stop(model_data->poll_temperature_thread);
+		kfree(model_data);
+	}
+
+	return err;
+}
+
+static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+	struct thermal_zone_device *tz;
+
+	lockdep_assert_held(&model->kbdev->ipa.lock);
+
+	if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) {
+		model_data->gpu_tz = NULL;
+	} else {
+		char tz_name[THERMAL_NAME_LENGTH];
+
+		strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
+
+		/* Release ipa.lock so that thermal_list_lock is not acquired
+		 * with ipa.lock held, thereby avoid lock ordering violation
+		 * lockdep warning. The warning comes as a chain of locks
+		 * ipa.lock --> thermal_list_lock --> tz->lock gets formed
+		 * on registering devfreq cooling device when probe method
+		 * of mali platform driver is invoked.
+		 */
+		mutex_unlock(&model->kbdev->ipa.lock);
+		tz = thermal_zone_get_zone_by_name(tz_name);
+		mutex_lock(&model->kbdev->ipa.lock);
+
+		if (IS_ERR_OR_NULL(tz)) {
+			pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n",
+					    PTR_ERR(tz), tz_name);
+			return -EPROBE_DEFER;
+		}
+
+		/* Check if another thread raced against us & updated the
+		 * thermal zone name string. Update the gpu_tz pointer only if
+		 * the name string did not change whilst we retrieved the new
+		 * thermal_zone_device pointer, otherwise model_data->tz_name &
+		 * model_data->gpu_tz would become inconsistent with each other.
+		 * The below check will succeed only for the thread which last
+		 * updated the name string.
+		 */
+		if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0)
+			model_data->gpu_tz = tz;
+	}
+
+	return 0;
+}
+
+static void kbase_simple_power_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_simple_data *model_data =
+			(struct kbase_ipa_model_simple_data *)model->model_data;
+
+	kthread_stop(model_data->poll_temperature_thread);
+
+	kfree(model_data);
+}
+
+struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = {
+		.name = "mali-simple-power-model",
+		.init = &kbase_simple_power_model_init,
+		.recalculate = &kbase_simple_power_model_recalculate,
+		.term = &kbase_simple_power_model_term,
+		.get_dynamic_coeff = &model_dynamic_coeff,
+		.get_static_coeff = &model_static_coeff,
+};
+KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
new file mode 100644
index 0000000000000..fed67d527c7c6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_SIMPLE_H_
+#define _KBASE_IPA_SIMPLE_H_
+
+#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)
+
+extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops;
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value
+ * @temp: Temperature of the thermal zone, in millidegrees celsius.
+ *
+ * This is only intended for use in unit tests, to ensure that the temperature
+ * values used by the simple power model are predictable. Deterministic
+ * behavior is necessary to allow validation of the static power values
+ * computed by this model.
+ */
+void kbase_simple_power_model_set_dummy_temp(int temp);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
+
+#endif /* _KBASE_IPA_SIMPLE_H_ */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
new file mode 100644
index 0000000000000..699252d0735b3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -0,0 +1,387 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+#define DEFAULT_SCALING_FACTOR 5
+
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
+
+/**
+ * read_hwcnt() - read a counter value
+ * @model_data:		pointer to model data
+ * @offset:		offset, in bytes, into vinstr buffer
+ *
+ * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
+ * incrementing every cycle over a ~100ms sample period at a high frequency,
+ * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
+ */
+static inline u32 kbase_ipa_read_hwcnt(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	u32 offset)
+{
+	u8 *p = model_data->vinstr_buffer;
+
+	return *(u32 *)&p[offset];
+}
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	if (S64_MAX - a < b)
+		return S64_MAX;
+	return a + b;
+}
+
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	u64 core_mask;
+	u32 base = 0;
+	s64 ret = 0;
+
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (core_mask != 0ull) {
+		if ((core_mask & 1ull) != 0ull) {
+			/* 0 < counter_value < 2^27 */
+			u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+						       base + counter);
+
+			/* 0 < ret < 2^27 * max_num_cores = 2^32 */
+			ret = kbase_ipa_add_saturate(ret, counter_value);
+		}
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+		core_mask >>= 1;
+	}
+
+	/* Range: -2^54 < ret * coeff < 2^54 */
+	return ret * coeff;
+}
+
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	u32 base = 0;
+	s64 ret = 0;
+	u32 i;
+
+	for (i = 0; i < num_blocks; i++) {
+		/* 0 < counter_value < 2^27 */
+		u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+					       base + counter);
+
+		/* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
+		ret = kbase_ipa_add_saturate(ret, counter_value);
+		base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+	}
+
+	/* Range: -2^51 < ret * coeff < 2^51 */
+	return ret * coeff;
+}
+
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
+
+	/* Range: -2^49 < ret < 2^49 */
+	return counter_value * (s64) coeff;
+}
+
+/**
+ * kbase_ipa_gpu_active - Inform IPA that GPU is now active
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become active.
+ */
+static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (!kbdev->ipa.vinstr_active) {
+		kbdev->ipa.vinstr_active = true;
+		kbase_vinstr_resume_client(model_data->vinstr_cli);
+	}
+}
+
+/**
+ * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
+ * @model_data: Pointer to model data
+ *
+ * This function may cause vinstr to become idle.
+ */
+static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->ipa.vinstr_active) {
+		kbase_vinstr_suspend_client(model_data->vinstr_cli);
+		kbdev->ipa.vinstr_active = false;
+	}
+}
+
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
+	size_t dump_size;
+
+	dump_size = kbase_vinstr_dump_size(kbdev);
+	model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!model_data->vinstr_buffer) {
+		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+		return -1;
+	}
+
+	setup.jm_bm = ~0u;
+	setup.shader_bm = ~0u;
+	setup.tiler_bm = ~0u;
+	setup.mmu_l2_bm = ~0u;
+	model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+			&setup, model_data->vinstr_buffer);
+	if (!model_data->vinstr_cli) {
+		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+		kfree(model_data->vinstr_buffer);
+		model_data->vinstr_buffer = NULL;
+		return -1;
+	}
+
+	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
+
+	kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
+	kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
+	kbdev->ipa.model_data = model_data;
+	kbdev->ipa.vinstr_active = false;
+	/* Suspend vinstr, to ensure that the GPU is powered off until there is
+	 * something to execute.
+	 */
+	kbase_vinstr_suspend_client(model_data->vinstr_cli);
+
+	return 0;
+}
+
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+
+	kbdev->ipa.gpu_active_callback = NULL;
+	kbdev->ipa.gpu_idle_callback = NULL;
+	kbdev->ipa.model_data = NULL;
+	kbdev->ipa.vinstr_active = false;
+
+	if (model_data->vinstr_cli)
+		kbase_vinstr_detach_client(model_data->vinstr_cli);
+
+	model_data->vinstr_cli = NULL;
+	kfree(model_data->vinstr_buffer);
+	model_data->vinstr_buffer = NULL;
+}
+
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+	struct kbase_device *kbdev = model_data->kbdev;
+	s64 energy = 0;
+	size_t i;
+	u64 coeff = 0, coeff_mul = 0;
+	u32 active_cycles;
+	int err = 0;
+
+	if (!kbdev->ipa.vinstr_active) {
+		err = -ENODATA;
+		goto err0; /* GPU powered off - no counters to collect */
+	}
+
+	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
+				    BASE_HWCNT_READER_EVENT_MANUAL);
+	if (err)
+		goto err0;
+
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1s, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	active_cycles = model_data->get_active_cycles(model_data);
+
+	if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
+		err = -ENODATA;
+		goto err0;
+	}
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+	 * -2^57 < energy < 2^57
+	 */
+	for (i = 0; i < model_data->groups_def_num; i++) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+		s32 coeff = model_data->group_values[i];
+		s64 group_energy = group->op(model_data, coeff,
+					     group->counter_block_offset);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^57 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+	 * can be constrained further: Counter values can only be increased by
+	 * a theoretical maximum of about 64k per clock cycle. Beyond this,
+	 * we'd have to sample every 1ms to avoid them overflowing at the
+	 * lowest clock frequency (say 100MHz). Therefore, we can write the
+	 * range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_num_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+	 *       (substitute max_counter = 2^16 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+	 * coeff <=    2^3         *    2^22   * 2^16 * active_cycles * 2^5
+	 * coeff <= 2^46 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^46
+	 */
+	coeff = div_u64(coeff, active_cycles);
+
+	/* Not all models were derived at the same reference voltage. Voltage
+	 * scaling is done by multiplying by V^2, so we need to *divide* by
+	 * Vref^2 here.
+	 * Range: 0 <= coeff <= 2^49
+	 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+	/* Range: 0 <= coeff <= 2^52 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+
+	/* Scale by user-specified integer factor.
+	 * Range: 0 <= coeff_mul < 2^57
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
+
+	/* The power models have results with units
+	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
+	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
+	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
+	 * by 1000.
+	 * Range: 0 <= coeff_mul < 2^47
+	 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
+
+err0:
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
+	return err;
+}
+
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_model_vinstr_data *model_data;
+
+	if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->groups_def = ipa_groups_def;
+	model_data->groups_def_num = ipa_group_size;
+	model_data->get_active_cycles = get_active_cycles;
+
+	model->model_data = (void *) model_data;
+
+	for (i = 0; i < model_data->groups_def_num; ++i) {
+		const struct kbase_ipa_group *group = &model_data->groups_def[i];
+
+		model_data->group_values[i] = group->default_value;
+		err = kbase_ipa_model_add_param_s32(model, group->name,
+					&model_data->group_values[i],
+					1, false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(model, "scale",
+					    &model_data->scaling_factor,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles,
+					    1, false);
+	if (err)
+		goto exit;
+
+	model_data->reference_voltage = reference_voltage;
+	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
+					    &model_data->reference_voltage,
+					    1, false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_vinstr(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_model_vinstr_data *model_data =
+			(struct kbase_ipa_model_vinstr_data *)model->model_data;
+
+	kbase_ipa_detach_vinstr(model_data);
+	kfree(model_data);
+}
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
new file mode 100644
index 0000000000000..0deafaebbc72c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -0,0 +1,215 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_VINSTR_COMMON_H_
+#define _KBASE_IPA_VINSTR_COMMON_H_
+
+#include "mali_kbase.h"
+
+/* Maximum number of IPA groups for an IPA model. */
+#define KBASE_IPA_MAX_GROUP_DEF_NUM  16
+
+/* Number of bytes per hardware counter in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_CNT    4
+
+/* Number of hardware counters per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_CNT_PER_BLOCK   64
+
+/* Number of bytes per block in a vinstr_buffer. */
+#define KBASE_IPA_NR_BYTES_PER_BLOCK \
+	(KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT)
+
+struct kbase_ipa_model_vinstr_data;
+
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *);
+
+/**
+ * struct kbase_ipa_model_vinstr_data - IPA context per device
+ * @kbdev:               pointer to kbase device
+ * @groups_def:          Array of IPA groups.
+ * @groups_def_num:      Number of elements in the array of IPA groups.
+ * @get_active_cycles:   Callback to return number of active cycles during
+ *                       counter sample period
+ * @vinstr_cli:          vinstr client handle
+ * @vinstr_buffer:       buffer to dump hardware counters onto
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients. Range approx
+ *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
+ * @scaling_factor:      User-specified power scaling factor. This is an
+ *                       integer, which is multiplied by the power coefficient
+ *                       just before OPP scaling.
+ *                       Range approx 0-32: 0 < scaling_factor < 2^5
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating  using a very small
+ *                       counter dump.
+ */
+struct kbase_ipa_model_vinstr_data {
+	struct kbase_device *kbdev;
+	s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM];
+	const struct kbase_ipa_group *groups_def;
+	size_t groups_def_num;
+	kbase_ipa_get_active_cycles_callback get_active_cycles;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	s32 reference_voltage;
+	s32 scaling_factor;
+	s32 min_sample_cycles;
+};
+
+/**
+ * struct ipa_group - represents a single IPA group
+ * @name:               name of the IPA group
+ * @default_value:      default value of coefficient for IPA group.
+ *                      Coefficients are interpreted as fractions where the
+ *                      denominator is 1000000.
+ * @op:                 which operation to be performed on the counter values
+ * @counter_block_offset:  block offset in bytes of the counter used to calculate energy for IPA group
+ */
+struct kbase_ipa_group {
+	const char *name;
+	s32 default_value;
+	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	u32 counter_block_offset;
+};
+
+/**
+ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'
+ * across all shader cores.
+ *
+ * Return: Sum of counter values. Range: -2^54 < ret < 2^54
+ */
+s64 kbase_ipa_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter' across all
+ * memory system blocks.
+ *
+ * Return: Sum of counter values. Range: -2^51 < ret < 2^51
+ */
+s64 kbase_ipa_sum_all_memsys_blocks(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_single_counter() - sum a single counter
+ * @model_data:		pointer to model data
+ * @coeff:		model coefficient. Unity is ~2^20, so range approx
+ *			+/- 4.0: -2^22 < coeff < 2^22
+ * @counter:		offset in bytes of the counter used to calculate energy
+ *			for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter'.
+ *
+ * Return: Counter value. Range: -2^49 < ret < 2^49
+ */
+s64 kbase_ipa_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff, u32 counter);
+
+/**
+ * attach_vinstr() - attach a vinstr_buffer to an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Attach a vinstr_buffer to an IPA model. The vinstr_buffer
+ * allows access to the hardware counters used to calculate
+ * energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * detach_vinstr() - detach a vinstr_buffer from an IPA model.
+ * @model_data		pointer to model data
+ *
+ * Detach a vinstr_buffer from an IPA model.
+ */
+void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data);
+
+/**
+ * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
+
+/**
+ * kbase_ipa_vinstr_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_groups_def:	array of ipa groups which sets coefficients for
+ *			the corresponding counters used in the ipa model
+ * @ipa_group_size:     number of elements in the array @ipa_groups_def
+ * @get_active_cycles:  callback to return the number of cycles the GPU was
+ *			active during the counter sample period.
+ * @reference_voltage:  voltage, in mV, of the operating point used when
+ *                      deriving the power model coefficients.
+ *
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_groups_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
+				       const struct kbase_ipa_group *ipa_groups_def,
+				       size_t ipa_group_size,
+				       kbase_ipa_get_active_cycles_callback get_active_cycles,
+				       s32 reference_voltage);
+
+/**
+ * kbase_ipa_vinstr_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
+
+#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
new file mode 100644
index 0000000000000..83660334d0834
--- /dev/null
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -0,0 +1,340 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/thermal.h>
+
+#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase.h"
+#include "mali_kbase_ipa_debugfs.h"
+
+
+/* Performance counter blocks base offsets */
+#define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define TILER_BASE          (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+#define MEMSYS_BASE         (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
+
+/* JM counter block offsets */
+#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT *  6)
+
+/* Tiler counter block offsets */
+#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25)
+
+/* SC counter block offsets */
+#define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
+#define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
+#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
+#define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
+#define SC_VARY_SLOT_32            (KBASE_IPA_NR_BYTES_PER_CNT * 50)
+#define SC_VARY_SLOT_16            (KBASE_IPA_NR_BYTES_PER_CNT * 51)
+#define SC_BEATS_RD_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 56)
+#define SC_BEATS_WR_LSC            (KBASE_IPA_NR_BYTES_PER_CNT * 61)
+#define SC_BEATS_WR_TIB            (KBASE_IPA_NR_BYTES_PER_CNT * 62)
+
+/**
+ * get_jm_counter() - get performance counter offset inside the Job Manager block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the Job Manager block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	return JM_BASE + counter_block_offset;
+}
+
+/**
+ * get_memsys_counter() - get performance counter offset inside the Memory System block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Memory System block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						    u32 counter_block_offset)
+{
+	/* The base address of Memory System performance counters is always the same, although their number
+	 * may vary based on the number of cores. For the moment it's ok to return a constant.
+	 */
+	return MEMSYS_BASE + counter_block_offset;
+}
+
+/**
+ * get_sc_counter() - get performance counter offset inside the Shader Cores block
+ * @model_data:            pointer to GPU model data.
+ * @counter_block_offset:  offset in bytes of the performance counter inside the (first) Shader Cores block.
+ *
+ * Return: Block offset in bytes of the required performance counter.
+ */
+static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
+						u32 counter_block_offset)
+{
+	const u32 sc_base = MEMSYS_BASE +
+		(model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
+		 KBASE_IPA_NR_BYTES_PER_BLOCK);
+
+	return sc_base + counter_block_offset;
+}
+
+/**
+ * memsys_single_counter() - calculate energy for a single Memory System performance counter.
+ * @model_data:   pointer to GPU model data.
+ * @coeff:        default value of coefficient for IPA group.
+ * @offset:       offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Memory System performance counter.
+ */
+static s64 kbase_g7x_sum_all_memsys_blocks(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
+}
+
+/**
+ * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a Shader Cores performance counter for all cores.
+ */
+static s64 kbase_g7x_sum_all_shader_cores(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_sc_counter(model_data,
+						       counter_block_offset);
+	return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter);
+}
+
+/**
+ * jm_single_counter() - calculate energy for a single Job Manager performance counter.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
+ *
+ * Return: Energy estimation for a single Job Manager performance counter.
+ */
+static s64 kbase_g7x_jm_single_counter(
+	struct kbase_ipa_model_vinstr_data *model_data,
+	s32 coeff,
+	u32 counter_block_offset)
+{
+	u32 counter;
+
+	counter = kbase_g7x_power_model_get_jm_counter(model_data,
+						     counter_block_offset);
+	return kbase_ipa_single_counter(model_data, coeff, counter);
+}
+
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data:            pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_g7x_get_active_cycles(
+	struct kbase_ipa_model_vinstr_data *model_data)
+{
+	u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE);
+
+	/* Counters are only 32-bit, so we can safely multiply by 1 then cast
+	 * the 64-bit result back to a u32.
+	 */
+	return kbase_ipa_single_counter(model_data, 1, counter);
+}
+
+/** Table of IPA group definitions.
+ *
+ * For each IPA group, this table defines a function to access the given performance block counter (or counters,
+ * if the operation needs to be iterated on multiple blocks) and calculate energy estimation.
+ */
+
+static const struct kbase_ipa_group ipa_groups_def_g71[] = {
+	{
+		.name = "l2_access",
+		.default_value = 526300,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 301100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 197400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -156400,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 115800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g72[] = {
+	{
+		.name = "l2_access",
+		.default_value = 393000,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 227000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "tex_issue",
+		.default_value = 181900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_COORD_ISSUE,
+	},
+	{
+		.name = "tile_wb",
+		.default_value = -120200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_BEATS_WR_TIB,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 133100,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 122000,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 488900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 212100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 288000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 378100,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 224200,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 384700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 271900,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 477700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 551400,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \
+				KBASE_IPA_MAX_GROUP_DEF_NUM); \
+		return kbase_ipa_vinstr_common_model_init(model, \
+				ipa_groups_def_ ## gpu, \
+				ARRAY_SIZE(ipa_groups_def_ ## gpu), \
+				kbase_g7x_get_active_cycles, \
+				(reference_voltage)); \
+	} \
+	struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## gpu ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+STANDARD_POWER_MODEL(g71, 800);
+STANDARD_POWER_MODEL(g72, 800);
+STANDARD_POWER_MODEL(tnox, 800);
+STANDARD_POWER_MODEL(tgox_r1, 1000);
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 0000000000000..10da0c58e9ebd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,461 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tHEx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tSIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDVx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tNOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tGOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tKAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tTRx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tBOx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tEGx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_TLS_HASHING,
+	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 0000000000000..19ffd6996ba75
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,1242 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_11056,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T720_1386,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T83X_817,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8463,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tSIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDVx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tGOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tEGx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 0000000000000..cc44ff225fa8e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1786 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined(PAGE_MASK) && defined(PAGE_SHIFT)
+#define LOCAL_PAGE_SHIFT PAGE_SHIFT
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
+ * which defines a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/* Memory allocation, access/hint flags.
+ *
+ * See base_mem_alloc_flags.
+ */
+
+/* IN */
+/* Read access CPU side
+ */
+#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
+
+/* Write access CPU side
+ */
+#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
+
+/* Read access GPU side
+ */
+#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
+
+/* Write access GPU side
+ */
+#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
+
+/* Execute allowed on the GPU side
+ */
+#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
+
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
+#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
+#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+
+/* Grow backing store on GPU Page Fault
+ */
+#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
+
+/* Page coherence Outer shareable, if available
+ */
+#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
+
+/* Page coherence Inner shareable
+ */
+#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
+
+/* Should be cached on the CPU
+ */
+#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
+
+/* IN/OUT */
+/* Must have same VA on both the GPU and the CPU
+ */
+#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
+
+/* OUT */
+/* Must call mmap to acquire a GPU address for the alloc
+ */
+#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
+
+/* IN */
+/* Page coherence Outer shareable, required.
+ */
+#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
+
+/* Secure memory
+ */
+#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+
+/* Not needed physical memory
+ */
+#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
+
+/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
+ * addresses to be the same
+ */
+#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
+
+/**
+ * Bit 19 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
+
+/**
+ * Memory starting from the end of the initial commit is aligned to 'extent'
+ * pages, where 'extent' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ * The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
+/* Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 22
+
+/* A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/* A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+/* A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
+
+
+/* A mask of all currently reserved flags
+ */
+#define BASE_MEM_FLAGS_RESERVED \
+	(BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
+		BASE_MEM_MAYBE_RESERVED_BIT_19)
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING)
+
+/* A mask of all the flags that can be returned via the base_mem_get_flags()
+ * interface.
+ */
+#define BASE_MEM_FLAGS_QUERYABLE \
+	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
+		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
+		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
+		BASE_MEM_FLAGS_KERNEL_ONLY))
+
+/**
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/**
+	 * Import type with value 1 is deprecated.
+	 */
+	BASE_MEM_IMPORT_TYPE_UMM = 2,
+	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
+} base_mem_import_type;
+
+/**
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:	address of imported user buffer
+ * @length:	length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+	u64 ptr;
+	u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
+/* reserved handles ..-48<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Mask to detect 4GB boundary alignment */
+#define BASE_MEM_MASK_4GB  0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
+
+/**
+ * Limit on the 'extent' parameter for an allocation with the
+ * BASE_MEM_TILER_ALIGN_TOP flag set
+ *
+ * This is the same as the maximum limit for a Buffer Descriptor's chunk size
+ */
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
+		(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
+		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completely unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ * @bin_id:                     The JIT allocation bin, used in conjunction with
+ *                              @max_allocations to limit the number of each
+ *                              type of JIT allocation.
+ * @max_allocations:            The maximum number of allocations allowed within
+ *                              the bin specified by @bin_id. Should be the same
+ *                              for all JIT allocations within the same bin.
+ * @flags:                      flags specifying the special requirements for
+ *                              the JIT allocation.
+ * @padding:                    Expansion space - should be initialised to zero
+ * @usage_id:                   A hint about which allocation should be reused.
+ *                              The kernel should attempt to use a previous
+ *                              allocation with the same usage_id
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+	u8 bin_id;
+	u8 max_allocations;
+	u8 flags;
+	u8 padding[2];
+	u16 usage_id;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0)       /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u32 base_jd_core_req;
+
+/* Requirements that come from the HW */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
+ * BASE_JD_REQ_SOFT_EVENT_WAIT.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a single or multiple JIT allocations through a list
+ * of @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of @base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a single or multiple JIT allocations created by
+ * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
+ * allocations is passed via the jc element of the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
+
+/**
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
+
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
+
+/*
+ * Returns non-zero value if core requirements passed define a soft job or
+ * a dependency only job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
+	((core_req & BASE_JD_REQ_SOFT_JOB) || \
+	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
+
+/**
+ * enum kbase_atom_coreref_state - States to model state machine processed by
+ * kbasep_js_job_check_ref_cores(), which handles retaining cores for power
+ * management.
+ *
+ * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be
+ * requested.
+ * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but
+ * waiting for them to be powered
+ * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to
+ * HW
+ */
+enum kbase_atom_coreref_state {
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ *   resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ *   allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ *   the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ *   respect to each other. That is, there is no ordering constraint between
+ *   atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM  ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH    ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW     ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to job dispatcher
+	 *  for dependency resolution */
+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+};
+
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
+typedef struct base_jd_atom_v2 {
+	u64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	u64 extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources or JIT allocations */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
+	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
+} base_jd_atom_v2;
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(struct base_dependency *dep,
+		base_atom_id id, base_jd_dep_type dep_type)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	/*
+	 * make sure we don't set not allowed combinations
+	 * of atom_id/dependency_type.
+	 */
+	LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+			(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(struct base_dependency *dep,
+		const struct base_dependency *from)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and @ref base_jd_submit
+ * has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_init.
+ * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor
+ *      is it associated with a fence-trigger job that was already submitted
+ *      by calling @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @ref base_jd_submit and
+ * @ref base_jd_submit has returned.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ *
+ * @pre @p fence must reference a @ref base_fence successfully initialized by
+ *      calling @ref base_fence_import, or it must be associated with a
+ *      fence-trigger job that was already submitted by calling
+ *      @ref base_jd_submit.
+ * @post @p atom can be submitted by calling @ref base_jd_submit.
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up an external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
+ * @param     access  The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+	u64 address;
+
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field
+ * of @ref base_jd_atom.
+ *
+ * It must not occupy the same CPU cache line(s) as any neighboring data.
+ * This is to avoid cases where access to pages containing the structure
+ * is shared between cached and un-cached memory regions, which would
+ * cause memory corruption.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[36];
+} base_dump_cpu_gpu_counters;
+
+/** @} end group base_user_api_job_dispatch */
+
+#define GPU_MAX_JOB_SLOTS 16
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistent guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algorithm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * required for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	u32 product_id;
+
+	/**
+	 * Status of the GPU release.
+	 * No defined values, but starts at 0 and increases by one for each
+	 * release status (alpha, beta, EAC, etc.).
+	 * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/* The maximum GPU frequency. Reported to applications by
+	 * clGetDeviceInfo()
+	 */
+	u32 gpu_freq_khz_max;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+
+	/**
+	 * The number of execution engines.
+	 */
+	u8 num_exec_engines;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;          /* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;         /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;              /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[3];
+	u32 tls_alloc;              /* Number of threads per core that TLS must
+				     * be allocated for
+				     */
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref gpu_mem_features
+	 * methods
+	 */
+	u32 coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+
+	u32 l2_features;
+	u32 core_features;
+	u32 mem_features;
+	u32 mmu_features;
+
+	u32 as_present;
+
+	u32 js_present;
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 tiler_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	u32 gpu_id;
+
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	/*
+	 * Note: This is the _selected_ coherency mode rather than the
+	 * available modes as exposed in the coherency_features register.
+	 */
+	u32 coherency_mode;
+
+	u32 thread_tls_alloc;
+};
+
+/**
+ * Return structure for base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this data structure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	u64 unused_1; /* keep for backwards compatibility */
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct gpu_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
+ * not collide with them.
+ */
+typedef u32 base_context_create_flags;
+
+/** No flags set */
+#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
+
+/** Base context is embedded in a cctx object (flag used for CINSTR
+ * software counter macros)
+ */
+#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
+
+/** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled.
+ */
+#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
+	((base_context_create_flags)1 << 1)
+
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | \
+	 BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be
+ * passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED
+
+/*
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	u64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	u64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	u64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	u64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
+ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
+#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
+
+/* Indicate that job dumping is enabled. This could affect certain timers
+ * to account for the performance impact. */
+#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
+
+#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
+		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 0000000000000..52c8a4f7d2d80
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 0000000000000..5e8add8838f24
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 0000000000000..dc0d5f1173e00
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,694 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0))
+#include <linux/sched/mm.h>
+#endif
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_linux.h>
+
+/*
+ * Include mali_kbase_defs.h first as this provides types needed by other local
+ * header files.
+ */
+#include "mali_kbase_defs.h"
+
+#include "mali_kbase_context.h"
+#include "mali_kbase_strings.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_debug_job_fault.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+
+#include "ipa/mali_kbase_ipa.h"
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+
+
+#ifndef u64_to_user_ptr
+/* Introduced in Linux v4.6 */
+#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
+#endif
+
+/*
+ * Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphore and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+
+/**
+ * kbase_get_unmapped_area() - get an address range which is currently
+ *                             unmapped.
+ * @filp: File operations associated with kbase device.
+ * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
+ *        as Mali GPU driver decides about the mapping).
+ * @len: Length of the address range.
+ * @pgoff: Page offset within the GPU address space of the kbase context.
+ * @flags: Flags for the allocation.
+ *
+ * Finds the unmapped address range which satisfies requirements specific to
+ * GPU and those provided by the call parameters.
+ *
+ * 1) Requirement for allocations greater than 2MB:
+ * - alignment offset is set to 2MB and the alignment mask to 2MB decremented
+ * by 1.
+ *
+ * 2) Requirements imposed for the shader memory alignment:
+ * - alignment is decided by the number of GPU pc bits which can be read from
+ * GPU properties of the device associated with this kbase context; alignment
+ * offset is set to this value in bytes and the alignment mask to the offset
+ * decremented by 1.
+ * - allocations must not to be at 4GB boundaries. Such cases are indicated
+ * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase
+ * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB.
+ *
+ * 3) Requirements imposed for tiler memory alignment, cases indicated by
+ * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
+ * region):
+ * - alignment offset is set to the difference between the kbase region
+ * extent (converted from the original value in pages to bytes) and the kbase
+ * region initial_commit (also converted from the original value in pages to
+ * bytes); alignment mask is set to the kbase region extent in bytes and
+ * decremented by 1.
+ *
+ * Return: if successful, address of the unmapped area aligned as required;
+ *         error code (negative) in case of failure;
+ */
+unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+
+/**
+ * kbase_jd_submit - Submit atoms to the job dispatcher
+ *
+ * @kctx: The kbase context to submit to
+ * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @nr_atoms: The number of atoms in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom);
+
+/**
+ * kbase_jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+void kbase_jd_done_worker(struct work_struct *data);
+
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ *                                               and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+/**
+ * kbasep_jit_alloc_validate() - Validate the JIT allocation info.
+ *
+ * @kctx:	Pointer to the kbase context within which the JIT
+ *		allocation is to be validated.
+ * @info:	Pointer to struct @base_jit_alloc_info
+ *			which is to be validated.
+ * @return: 0 if jit allocation is valid; negative error code otherwise
+ */
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info);
+/**
+ * kbase_mem_copy_from_extres_page() - Copy pages from external resources.
+ *
+ * @kctx:		kbase context within which the copying is to take place.
+ * @extres_pages:	Pointer to the pages which correspond to the external
+ *			resources from which the copying will take place.
+ * @pages:		Pointer to the pages to which the content is to be
+ *			copied from the provided external resources.
+ * @nr_pages:		Number of pages to copy.
+ * @target_page_nr:	Number of target pages which will be used for copying.
+ * @offset:		Offset into the target pages from which the copying
+ *			is to be performed. 
+ * @to_copy:		Size of the chunk to be copied, in bytes. 
+ */
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy);
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx:	kbase context within which the copying is to take place.
+ * @buf_data:	Pointer to the information about external resources:
+ *		pages pertaining to the external resource, number of
+ *		pages to copy.
+ */
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data);
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom);
+#endif
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev     The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+		u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+	return kbdev->pm.suspending;
+}
+
+/**
+ * kbase_pm_is_active - Determine whether the GPU is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This takes into account the following
+ *
+ * - whether there is an active context reference
+ *
+ * - whether any of the shader cores or the tiler are needed
+ *
+ * It should generally be preferred against checking just
+ * kbdev->pm.active_count on its own, because some code paths drop their
+ * reference on this whilst still having the shader cores/tiler in use.
+ *
+ * Return: true if the GPU is active, false otherwise
+ */
+static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
+{
+	return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt ||
+			kbdev->tiler_needed_cnt);
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id:   ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+		struct kbase_context *kctx, int id)
+{
+	return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/* kbase_io_history_init - initialize data struct for register access history
+ *
+ * @kbdev The register history to initialize
+ * @n The number of register accesses that the buffer could hold
+ *
+ * @return 0 if successfully initialized, failure otherwise
+ */
+int kbase_io_history_init(struct kbase_io_history *h, u16 n);
+
+/* kbase_io_history_term - uninit all resources for the register access history
+ *
+ * @h The register history to terminate
+ */
+void kbase_io_history_term(struct kbase_io_history *h);
+
+/* kbase_io_history_dump - print the register history to the kernel ring buffer
+ *
+ * @kbdev Pointer to kbase_device containing the register history to dump
+ */
+void kbase_io_history_dump(struct kbase_device *kbdev);
+
+/**
+ * kbase_io_history_resize - resize the register access history buffer.
+ *
+ * @h: Pointer to a valid register history to resize
+ * @new_size: Number of accesses the buffer could hold
+ *
+ * A successful resize will clear all recent register accesses.
+ * If resizing fails for any reason (e.g., could not allocate memory, invalid
+ * buffer size) then the original buffer will be kept intact.
+ *
+ * @return 0 if the buffer was resized, failure otherwise
+ */
+int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbase_io_history_init(...) ((int)0)
+
+#define kbase_io_history_term CSTD_NOP
+
+#define kbase_io_history_dump CSTD_NOP
+
+#define kbase_io_history_resize CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+
+#endif
+
+
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 0000000000000..8d71926ea575b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	struct tagged_addr *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_cpu_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = as_page(page_array[page_index]);
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+
+	kbase_sync_single_for_cpu(katom->kctx->kbdev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = as_page(page_array[page_index + 1]);
+		page_2 = kmap_atomic(p);
+
+		kbase_sync_single_for_cpu(katom->kctx->kbdev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = as_page(page_array[page_index]);
+
+		kbase_sync_single_for_device(katom->kctx->kbdev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = as_page(page_array[page_index + 1]);
+
+			kbase_sync_single_for_device(katom->kctx->kbdev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 0000000000000..379a05a1a1280
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+/**
+ * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices
+ * @katom: atom representing the fragment job for which the WA has to be applied
+ *
+ * This workaround is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, it can happen
+ * that the restart index is out of bounds and the rerun causes a tile range
+ * fault. If this happens we try to clamp the restart index to a correct value.
+ */
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
new file mode 100644
index 0000000000000..2e99a4d8ab1ce
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -0,0 +1,111 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_as_fault_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+
+static int kbase_as_fault_read(struct seq_file *sfile, void *data)
+{
+	uintptr_t as_no = (uintptr_t) sfile->private;
+
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	struct kbase_device *kbdev = NULL;
+
+	kbdev_list = kbase_dev_list_get();
+
+	list_for_each(entry, kbdev_list) {
+		kbdev = list_entry(entry, struct kbase_device, entry);
+
+		if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) {
+
+			/* don't show this one again until another fault occors */
+			kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no);
+
+			/* output the last page fault addr */
+			seq_printf(sfile, "%llu\n",
+				   (u64) kbdev->as[as_no].fault_addr);
+		}
+
+	}
+
+	kbase_dev_list_put(kbdev_list);
+
+	return 0;
+}
+
+static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_as_fault_read, in->i_private);
+}
+
+static const struct file_operations as_fault_fops = {
+	.open = kbase_as_fault_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ *  Initialize debugfs entry for each address space
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	uint i;
+	char as_name[64];
+	struct dentry *debugfs_directory;
+
+	kbdev->debugfs_as_read_bitmap = 0ULL;
+
+	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+
+	debugfs_directory = debugfs_create_dir("address_spaces",
+					       kbdev->mali_debugfs_directory);
+
+	if (debugfs_directory) {
+		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
+			debugfs_create_file(as_name, S_IRUGO,
+					    debugfs_directory,
+					    (void *)(uintptr_t)i,
+					    &as_fault_fops);
+		}
+	} else {
+		dev_warn(kbdev->dev,
+			 "unable to create address_spaces debugfs directory");
+	}
+
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
+	return;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
new file mode 100644
index 0000000000000..496d8b17f2404
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_AS_FAULT_DEBUG_FS_H
+#define _KBASE_AS_FAULT_DEBUG_FS_H
+
+/**
+ * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults
+ *
+ * @kbdev: Pointer to kbase_device
+ */
+void kbase_as_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_as_fault_debugfs_new() - make the last fault available on debugfs
+ *
+ * @kbdev: Pointer to kbase_device
+ * @as_no: The address space the fault occurred on
+ */
+static inline void
+kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
+{
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_MALI_DEBUG */
+	return;
+}
+
+#endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 0000000000000..27a03cf021380
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled
+ * KBASE_REG_GPU_CACHED: GPU cache should be enabled
+ *
+ * NOTE: Some components within the GPU might only be able to access memory
+ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for
+ * more details.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (!(flags & BASE_MEM_UNCACHED_GPU))
+		cache_flags |= KBASE_REG_GPU_CACHED;
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 0000000000000..8a1e5291bf5f1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags:    flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ *         depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 0000000000000..ce7070d1d634c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_init_func)
+		return platform_funcs_p->platform_init_func(kbdev);
+
+	return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs_p;
+
+	platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+	if (platform_funcs_p && platform_funcs_p->platform_term_func)
+		platform_funcs_p->platform_term_func(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 0000000000000..1637fcbc4d297
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,299 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <linux/mm.h>
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+#include <linux/rbtree.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+	/**
+	 * platform_init_func - platform specific init function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * Function pointer for platform specific initialization or NULL if no
+	 * initialization function is required. At the point this the GPU is
+	 * not active and its power and clocks are in unknown (platform specific
+	 * state) as kbase doesn't yet have control of power and clocks.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly initialized) in here.
+	 */
+	int (*platform_init_func)(struct kbase_device *kbdev);
+	/**
+	 * platform_term_func - platform specific termination function pointer
+	 * @kbdev - kbase_device pointer
+	 *
+	 * Function pointer for platform specific termination or NULL if no
+	 * termination function is required. At the point this the GPU will be
+	 * idle but still powered and clocked.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else int error code.
+	 */
+	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+	/*
+	 * Optional callback for checking if GPU can be suspended when idle
+	 *
+	 * This callback will be called by the runtime power management core
+	 * when the reference count goes to 0 to provide notification that the
+	 * GPU now seems idle.
+	 *
+	 * If this callback finds that the GPU can't be powered off, or handles
+	 * suspend by powering off directly or queueing up a power off, a
+	 * non-zero value must be returned to prevent the runtime PM core from
+	 * also triggering a suspend.
+	 *
+	 * Returning 0 will cause the runtime PM core to conduct a regular
+	 * autosuspend.
+	 *
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
+	 *
+	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+	 * this feature.
+	 *
+	 * Return 0 if GPU can be suspended, positive value if it can not be
+	 * suspeneded by runtime PM, else OS error code
+	 */
+	int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
+};
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+	const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes.  The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ *         Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+#ifndef CONFIG_OF
+/**
+ * kbase_platform_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_register(void);
+
+/**
+ * kbase_platform_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_register()
+ */
+void kbase_platform_unregister(void);
+#endif
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 0000000000000..9d918a804b2fb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,278 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 * Restricting ID width will reduce performance & bus load due to GPU.
+	 */
+	KBASE_3BIT_AID_32 = 0x0,
+
+	/* Restrict GPU to 7/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_28 = 0x1,
+
+	/* Restrict GPU to 3/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_24 = 0x2,
+
+	/* Restrict GPU to 5/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_20 = 0x3,
+
+	/* Restrict GPU to 1/2 of maximum Address ID count.  */
+	KBASE_3BIT_AID_16 = 0x4,
+
+	/* Restrict GPU to 3/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_12 = 0x5,
+
+	/* Restrict GPU to 1/4 of maximum Address ID count. */
+	KBASE_3BIT_AID_8  = 0x6,
+
+	/* Restrict GPU to 1/8 of maximum Address ID count. */
+	KBASE_3BIT_AID_4  = 0x7
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
+
+/**
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/**
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/**
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/**
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/**
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
+
+/**
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
+
+/**
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/**
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+/**
+ * Perform GPU power down using only platform specific code, skipping DDK power
+ * management.
+ *
+ * If this is non-zero then kbase will avoid powering down shader cores, the
+ * tiler, and the L2 cache, instead just powering down the entire GPU through
+ * platform specific code. This may be required for certain platform
+ * integrations.
+ *
+ * Note that as this prevents kbase from powering down shader cores, this limits
+ * the available power policies to coarse_demand and always_on.
+ */
+#define PLATFORM_POWER_DOWN_ONLY (0)
+
+/**
+ * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
+ * this isn't available, so we simply define a dummy value here. If devfreq
+ * is enabled the value will be read from there, otherwise this should be
+ * overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
+ */
+#define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 0000000000000..970be895e671c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,327 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_dma_fence.h>
+#include <mali_kbase_ctx_sched.h>
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	int err;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+	if (is_compat)
+		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+#if defined(CONFIG_64BIT)
+	else
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#endif /* !defined(CONFIG_64BIT) */
+
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
+
+	err = kbase_mem_pool_init(&kctx->mem_pool,
+				  kbdev->mem_pool_max_size_default,
+				  KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->mem_pool);
+	if (err)
+		goto free_kctx;
+
+	err = kbase_mem_pool_init(&kctx->lp_mem_pool,
+				  (kbdev->mem_pool_max_size_default >> 9),
+				  KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				  kctx->kbdev,
+				  &kbdev->lp_mem_pool);
+	if (err)
+		goto free_mem_pool;
+
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_both_pools;
+
+	atomic_set(&kctx->used_pages, 0);
+
+	err = kbase_jd_init(kctx);
+	if (err)
+		goto deinit_evictable;
+
+	err = kbasep_js_kctx_init(kctx);
+	if (err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	err = kbase_event_init(kctx);
+	if (err)
+		goto free_jd;
+
+
+	atomic_set(&kctx->drain_pending, 0);
+
+	mutex_init(&kctx->reg_lock);
+
+	spin_lock_init(&kctx->mem_partials_lock);
+	INIT_LIST_HEAD(&kctx->mem_partials);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
+
+	err = kbase_mmu_init(kbdev, &kctx->mmu, kctx);
+	if (err)
+		goto term_dma_fence;
+
+	p = kbase_mem_alloc_page(&kctx->mem_pool);
+	if (!p)
+		goto no_sink_page;
+	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
+
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+
+	/* Make sure page 0 is not used... */
+	err = kbase_region_tracker_init(kctx);
+	if (err)
+		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mutex_init(&kctx->vinstr_cli_lock);
+
+	kbase_timer_setup(&kctx->soft_job_timeout,
+			  kbasep_soft_job_timeout_worker);
+
+	return kctx;
+
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
+no_region_tracker:
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+no_sink_page:
+	kbase_mmu_term(kbdev, &kctx->mmu);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
+free_both_pools:
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+free_mem_pool:
+	kbase_mem_pool_term(&kctx->mem_pool);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_device *kbdev,
+		struct kbase_va_region *reg)
+{
+	dev_dbg(kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+}
+
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+	unsigned long flags;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	kbase_mem_pool_mark_dying(&kctx->mem_pool);
+
+	kbase_jd_zap_context(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	/* Removing the rest of the debugfs entries here as we want to keep the
+	 * atom debugfs interface alive until all atoms have completed. This
+	 * is useful for debugging hung contexts. */
+	debugfs_remove_recursive(kctx->kctx_dentry);
+#endif
+
+	kbase_event_cleanup(kctx);
+
+
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	kbase_sticky_resource_term(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	p = as_page(kctx->aliasing_sink_page);
+	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_dma_fence_term(kctx);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_ctx_sched_remove_ctx(kctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_term(kbdev, &kctx->mmu);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_evictable_deinit(kctx);
+	kbase_mem_pool_term(&kctx->mem_pool);
+	kbase_mem_pool_term(&kctx->lp_mem_pool);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+
+	kbase_pm_context_idle(kbdev);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	int err = 0;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long irq_flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h
new file mode 100644
index 0000000000000..30b0f649806bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -0,0 +1,123 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set, which shall be one of the flags of
+ *         BASE_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 0000000000000..e4c60103fbde1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,4211 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <ipa/mali_kbase_ipa_debugfs.h>
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
+#include "mali_kbase_regs_history_debugfs.h"
+#include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_ctx_sched.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_ioctl.h"
+
+#ifdef CONFIG_MALI_JOB_DUMP
+#include "mali_kbase_gwt.h"
+#endif
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task/in_compat_syscall */
+#include <linux/mman.h>
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+
+#include <mali_kbase_config.h>
+
+
+#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE)
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#include <mali_kbase_tlstream.h>
+
+#include <mali_kbase_as_fault_debugfs.h>
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+
+static int kbase_api_handshake(struct kbase_context *kctx,
+			       struct kbase_ioctl_version_check *version)
+{
+	switch (version->major) {
+	case BASE_UK_VERSION_MAJOR:
+		/* set minor to be the lowest common */
+		version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+				       (int)version->minor);
+		break;
+	default:
+		/* We return our actual version regardless if it
+		 * matches the version returned by userspace -
+		 * userspace can bail if it can't handle this
+		 * version
+		 */
+		version->major = BASE_UK_VERSION_MAJOR;
+		version->minor = BASE_UK_VERSION_MINOR;
+		break;
+	}
+
+	/* save the proposed version number for later use */
+	kctx->api_version = KBASE_API_VERSION(version->major, version->minor);
+
+	return 0;
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ *
+ * @MALI_ERROR_NONE: Success
+ * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver
+ * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure
+ * @MALI_ERROR_FUNCTION_FAILED: Generic error code
+ */
+enum mali_error {
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+};
+
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	/* Bit number 2 was earlier assigned to the runtime-pm initialization
+	 * stage (which has been merged with the backend_early stage).
+	 */
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+	inited_job_fault = (1u << 10),
+	inited_sysfs_group = (1u << 11),
+	inited_misc_register = (1u << 12),
+	inited_get_device = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_io_history = (1u << 18),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20),
+	inited_protected = (1u << 21),
+	inited_ctx_sched = (1u << 22)
+};
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strncmp(irq_res->name, "job", 4)) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "mmu", 4)) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strncmp(irq_res->name, "gpu", 4)) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	mutex_lock(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+	const struct list_head *dev_list = kbase_dev_list_get();
+
+	list_for_each(entry, dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	kbase_dev_list_put(dev_list);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
+		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
+		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
+/*
+ * Older versions, before v4.6, of the kernel doesn't have
+ * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28
+ */
+static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	char buf[4];
+
+	count = min(count, sizeof(buf) - 1);
+
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+
+	return strtobool(buf, res);
+}
+#endif
+
+static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	else
+		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
+
+	return size;
+}
+
+static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_infinite_cache_fops = {
+	.open = simple_open,
+	.write = write_ctx_infinite_cache,
+	.read = read_ctx_infinite_cache,
+};
+
+static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	int err;
+	bool value;
+
+	err = kstrtobool_from_user(ubuf, size, &value);
+	if (err)
+		return err;
+
+	if (value) {
+#if defined(CONFIG_64BIT)
+		/* 32-bit clients cannot force SAME_VA */
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			return -EINVAL;
+		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
+#else /* defined(CONFIG_64BIT) */
+		/* 32-bit clients cannot force SAME_VA */
+		return -EINVAL;
+#endif /* defined(CONFIG_64BIT) */
+	} else {
+		kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA);
+	}
+
+	return size;
+}
+
+static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf,
+		size_t size, loff_t *off)
+{
+	struct kbase_context *kctx = f->private_data;
+	char buf[32];
+	int count;
+	bool value;
+
+	value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA);
+
+	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
+
+	return simple_read_from_buffer(ubuf, size, off, buf, count);
+}
+
+static const struct file_operations kbase_force_same_va_fops = {
+	.open = simple_open,
+	.write = write_ctx_force_same_va,
+	.read = read_ctx_force_same_va,
+};
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+	char kctx_name[64];
+#endif
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
+	kctx = kbase_create_context(kbdev, in_compat_syscall());
+#else
+	kctx = kbase_create_context(kbdev, is_compat_task());
+#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+	filp->f_mode |= FMODE_UNSIGNED_OFFSET;
+	kctx->filp = filp;
+
+	if (kbdev->infinite_cache_active_default)
+		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+
+#ifdef CONFIG_DEBUG_FS
+	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+	kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+			kbdev->debugfs_ctx_directory);
+
+	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+			kctx, &kbase_infinite_cache_fops);
+	debugfs_create_file("force_same_va", S_IRUSR | S_IWUSR,
+			kctx->kctx_dentry, kctx, &kbase_force_same_va_fops);
+
+	mutex_init(&kctx->mem_profile_lock);
+
+	kbasep_jd_debugfs_ctx_init(kctx);
+	kbase_debug_mem_view_init(filp);
+
+	kbase_debug_job_fault_context_init(kctx);
+
+	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool);
+
+	kbase_jit_debugfs_init(kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			KBASE_TLSTREAM_TL_NEW_CTX(
+					element->kctx,
+					element->kctx->id,
+					(u32)(element->kctx->tgid));
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	bool found_element = false;
+
+	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
+
+#ifdef CONFIG_DEBUG_FS
+	kbasep_mem_profile_debugfs_remove(kctx);
+	kbase_debug_job_fault_context_term(kctx);
+#endif
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	/* If this client was performing hwcnt dumping and did not explicitly
+	 * detach itself, remove it from the vinstr core now */
+	if (kctx->vinstr_cli) {
+		struct kbase_ioctl_hwcnt_enable enable;
+
+		enable.dump_buffer = 0llu;
+		kbase_vinstr_legacy_hwc_setup(
+				kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
+	}
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+static int kbase_api_set_flags(struct kbase_context *kctx,
+		struct kbase_ioctl_set_flags *flags)
+{
+	int err;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+		return -EINVAL;
+
+	err = kbase_context_set_create_flags(kctx, flags->create_flags);
+	/* if bad flags, will stay stuck in setup mode */
+	if (err)
+		return err;
+
+	atomic_set(&kctx->setup_complete, 1);
+	return 0;
+}
+
+static int kbase_api_job_submit(struct kbase_context *kctx,
+		struct kbase_ioctl_job_submit *submit)
+{
+	return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr),
+			submit->nr_atoms,
+			submit->stride, false);
+}
+
+static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+		struct kbase_ioctl_get_gpuprops *get_props)
+{
+	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	int err;
+
+	if (get_props->flags != 0) {
+		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		return -EINVAL;
+	}
+
+	if (get_props->size == 0)
+		return kprops->prop_buffer_size;
+	if (get_props->size < kprops->prop_buffer_size)
+		return -EINVAL;
+
+	err = copy_to_user(u64_to_user_ptr(get_props->buffer),
+			kprops->prop_buffer,
+			kprops->prop_buffer_size);
+	if (err)
+		return -EFAULT;
+	return kprops->prop_buffer_size;
+}
+
+static int kbase_api_post_term(struct kbase_context *kctx)
+{
+	kbase_event_close(kctx);
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+	rcu_read_lock();
+	/* Don't allow memory allocation until user space has set up the
+	 * tracking page (which sets kctx->process_mm). Also catches when we've
+	 * forked.
+	 */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
+		/* force SAME_VA if a 64-bit client */
+		flags |= BASE_MEM_SAME_VA;
+	}
+
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
+			alloc->in.commit_pages,
+			alloc->in.extent,
+			&flags, &gpu_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc->out.flags = flags;
+	alloc->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_query(struct kbase_context *kctx,
+		union kbase_ioctl_mem_query *query)
+{
+	return kbase_mem_query(kctx, query->in.gpu_addr,
+			query->in.query, &query->out.value);
+}
+
+static int kbase_api_mem_free(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_free *free)
+{
+	return kbase_mem_free(kctx, free->gpu_addr);
+}
+
+static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
+			&kctx->vinstr_cli, enable);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_dump(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+static int kbase_api_hwcnt_clear(struct kbase_context *kctx)
+{
+	int ret;
+
+	mutex_lock(&kctx->vinstr_cli_lock);
+	ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
+	mutex_unlock(&kctx->vinstr_cli_lock);
+
+	return ret;
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_api_hwcnt_set(struct kbase_context *kctx,
+		struct kbase_ioctl_hwcnt_values *values)
+{
+	gpu_model_set_dummy_prfcnt_sample(
+			(u32 __user *)(uintptr_t)values->data,
+			values->size);
+
+	return 0;
+}
+#endif
+
+static int kbase_api_disjoint_query(struct kbase_context *kctx,
+		struct kbase_ioctl_disjoint_query *query)
+{
+	query->counter = kbase_disjoint_event_get(kctx->kbdev);
+
+	return 0;
+}
+
+static int kbase_api_get_ddk_version(struct kbase_context *kctx,
+		struct kbase_ioctl_get_ddk_version *version)
+{
+	int ret;
+	int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+
+	if (version->version_buffer == 0)
+		return len;
+
+	if (version->size < len)
+		return -EOVERFLOW;
+
+	ret = copy_to_user(u64_to_user_ptr(version->version_buffer),
+			KERNEL_SIDE_DDK_VERSION_STRING,
+			sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+
+	if (ret)
+		return -EFAULT;
+
+	return len;
+}
+
+/* Defaults for legacy JIT init ioctl */
+#define DEFAULT_MAX_JIT_ALLOCATIONS 255
+#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */
+
+static int kbase_api_mem_jit_init_old(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init_old *jit_init)
+{
+	kctx->jit_version = 1;
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			DEFAULT_MAX_JIT_ALLOCATIONS,
+			JIT_LEGACY_TRIM_LEVEL);
+}
+
+static int kbase_api_mem_jit_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_jit_init *jit_init)
+{
+	int i;
+
+	kctx->jit_version = 2;
+
+	for (i = 0; i < sizeof(jit_init->padding); i++) {
+		/* Ensure all padding bytes are 0 for potential future
+		 * extension
+		 */
+		if (jit_init->padding[i])
+			return -EINVAL;
+	}
+
+	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
+			jit_init->max_allocations, jit_init->trim_level);
+}
+
+static int kbase_api_mem_sync(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_sync *sync)
+{
+	struct basep_syncset sset = {
+		.mem_handle.basep.handle = sync->handle,
+		.user_addr = sync->user_addr,
+		.size = sync->size,
+		.type = sync->type
+	};
+
+	return kbase_sync_now(kctx, &sset);
+}
+
+static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_cpu_offset *find)
+{
+	return kbasep_find_enclosing_cpu_mapping_offset(
+			kctx,
+			find->in.cpu_addr,
+			find->in.size,
+			&find->out.offset);
+}
+
+static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx,
+		union kbase_ioctl_mem_find_gpu_start_and_offset *find)
+{
+	return kbasep_find_enclosing_gpu_mapping_start_and_offset(
+			kctx,
+			find->in.gpu_addr,
+			find->in.size,
+			&find->out.start,
+			&find->out.offset);
+}
+
+static int kbase_api_get_context_id(struct kbase_context *kctx,
+		struct kbase_ioctl_get_context_id *info)
+{
+	info->id = kctx->id;
+
+	return 0;
+}
+
+static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_acquire *acquire)
+{
+	return kbase_tlstream_acquire(kctx, acquire->flags);
+}
+
+static int kbase_api_tlstream_flush(struct kbase_context *kctx)
+{
+	kbase_tlstream_flush_streams();
+
+	return 0;
+}
+
+static int kbase_api_mem_commit(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_commit *commit)
+{
+	return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages);
+}
+
+static int kbase_api_mem_alias(struct kbase_context *kctx,
+		union kbase_ioctl_mem_alias *alias)
+{
+	struct base_mem_aliasing_info *ai;
+	u64 flags;
+	int err;
+
+	if (alias->in.nents == 0 || alias->in.nents > 2048)
+		return -EINVAL;
+
+	if (alias->in.stride > (U64_MAX / 2048))
+		return -EINVAL;
+
+	ai = vmalloc(sizeof(*ai) * alias->in.nents);
+	if (!ai)
+		return -ENOMEM;
+
+	err = copy_from_user(ai,
+			u64_to_user_ptr(alias->in.aliasing_info),
+			sizeof(*ai) * alias->in.nents);
+	if (err) {
+		vfree(ai);
+		return -EFAULT;
+	}
+
+	flags = alias->in.flags;
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) {
+		vfree(ai);
+		return -EINVAL;
+	}
+
+	alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
+			alias->in.stride, alias->in.nents,
+			ai, &alias->out.va_pages);
+
+	alias->out.flags = flags;
+
+	vfree(ai);
+
+	if (alias->out.gpu_va == 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int kbase_api_mem_import(struct kbase_context *kctx,
+		union kbase_ioctl_mem_import *import)
+{
+	int ret;
+	u64 flags = import->in.flags;
+
+	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	ret = kbase_mem_import(kctx,
+			import->in.type,
+			u64_to_user_ptr(import->in.phandle),
+			import->in.padding,
+			&import->out.gpu_va,
+			&import->out.va_pages,
+			&flags);
+
+	import->out.flags = flags;
+
+	return ret;
+}
+
+static int kbase_api_mem_flags_change(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_flags_change *change)
+{
+	if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	return kbase_mem_flags_change(kctx, change->gpu_va,
+			change->flags, change->mask);
+}
+
+static int kbase_api_stream_create(struct kbase_context *kctx,
+		struct kbase_ioctl_stream_create *stream)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	int fd, ret;
+
+	/* Name must be NULL-terminated and padded with NULLs, so check last
+	 * character is NULL
+	 */
+	if (stream->name[sizeof(stream->name)-1] != 0)
+		return -EINVAL;
+
+	ret = kbase_sync_fence_stream_create(stream->name, &fd);
+
+	if (ret)
+		return ret;
+	return fd;
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_fence_validate(struct kbase_context *kctx,
+		struct kbase_ioctl_fence_validate *validate)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	return kbase_sync_fence_validate(validate->fd);
+#else
+	return -ENOENT;
+#endif
+}
+
+static int kbase_api_get_profiling_controls(struct kbase_context *kctx,
+		struct kbase_ioctl_get_profiling_controls *controls)
+{
+	int ret;
+
+	if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN))
+		return -EINVAL;
+
+	ret = copy_to_user(u64_to_user_ptr(controls->buffer),
+			&kctx->kbdev->kbase_profiling_controls[
+				FBDUMP_CONTROL_MIN],
+			controls->count * sizeof(u32));
+
+	if (ret)
+		return -EFAULT;
+	return 0;
+}
+
+static int kbase_api_mem_profile_add(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_profile_add *data)
+{
+	char *buf;
+	int err;
+
+	if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+		dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(data->len, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(buf))
+		return -ENOMEM;
+
+	err = copy_from_user(buf, u64_to_user_ptr(data->buffer),
+			data->len);
+	if (err) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len);
+}
+
+static int kbase_api_soft_event_update(struct kbase_context *kctx,
+		struct kbase_ioctl_soft_event_update *update)
+{
+	if (update->flags != 0)
+		return -EINVAL;
+
+	return kbase_soft_event_update(kctx, update->event, update->new_status);
+}
+
+static int kbase_api_sticky_resource_map(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_map *map)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address),
+			sizeof(u64) * map->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < map->count; i++) {
+		if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) {
+			/* Invalid resource */
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret != 0) {
+		while (i > 0) {
+			i--;
+			kbase_sticky_resource_release(kctx, NULL, gpu_addr[i]);
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx,
+		struct kbase_ioctl_sticky_resource_unmap *unmap)
+{
+	int ret;
+	u64 i;
+	u64 gpu_addr[BASE_EXT_RES_COUNT_MAX];
+
+	if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX)
+		return -EOVERFLOW;
+
+	ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address),
+			sizeof(u64) * unmap->count);
+
+	if (ret != 0)
+		return -EFAULT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < unmap->count; i++) {
+		if (!kbase_sticky_resource_release(kctx, NULL, gpu_addr[i])) {
+			/* Invalid resource, but we keep going anyway */
+			ret = -EINVAL;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+#if MALI_UNIT_TEST
+static int kbase_api_tlstream_test(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_test *test)
+{
+	kbase_tlstream_test(
+			test->tpw_count,
+			test->msg_delay,
+			test->msg_count,
+			test->aux_msg);
+
+	return 0;
+}
+
+static int kbase_api_tlstream_stats(struct kbase_context *kctx,
+		struct kbase_ioctl_tlstream_stats *stats)
+{
+	kbase_tlstream_stats(
+			&stats->bytes_collected,
+			&stats->bytes_generated);
+
+	return 0;
+}
+#endif /* MALI_UNIT_TEST */
+
+
+#define KBASE_HANDLE_IOCTL(cmd, function)                          \
+	do {                                                       \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
+		return function(kctx);                             \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
+	do {                                                       \
+		type param;                                        \
+		int err;                                           \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		err = copy_from_user(&param, uarg, sizeof(param)); \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return function(kctx, &param);                     \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
+	do {                                                       \
+		type param;                                        \
+		int ret, err;                                      \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
+		ret = function(kctx, &param);                      \
+		err = copy_to_user(uarg, &param, sizeof(param));   \
+		if (err)                                           \
+			return -EFAULT;                            \
+		return ret;                                        \
+	} while (0)
+
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
+	do {                                                           \
+		type param;                                            \
+		int ret, err;                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
+		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
+		err = copy_from_user(&param, uarg, sizeof(param));     \
+		if (err)                                               \
+			return -EFAULT;                                \
+		ret = function(kctx, &param);                          \
+		err = copy_to_user(uarg, &param, sizeof(param));       \
+		if (err)                                               \
+			return -EFAULT;                                \
+		return ret;                                            \
+	} while (0)
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	void __user *uarg = (void __user *)arg;
+
+	/* Only these ioctls are available until setup is complete */
+	switch (cmd) {
+	case KBASE_IOCTL_VERSION_CHECK:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
+				kbase_api_handshake,
+				struct kbase_ioctl_version_check);
+		break;
+
+	case KBASE_IOCTL_SET_FLAGS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
+				kbase_api_set_flags,
+				struct kbase_ioctl_set_flags);
+		break;
+	}
+
+	/* Block call until version handshake and setup is complete */
+	if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete))
+		return -EINVAL;
+
+	/* Normal ioctls */
+	switch (cmd) {
+	case KBASE_IOCTL_JOB_SUBMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
+				kbase_api_job_submit,
+				struct kbase_ioctl_job_submit);
+		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
+				kbase_api_get_gpuprops,
+				struct kbase_ioctl_get_gpuprops);
+		break;
+	case KBASE_IOCTL_POST_TERM:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
+				kbase_api_post_term);
+		break;
+	case KBASE_IOCTL_MEM_ALLOC:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
+				kbase_api_mem_alloc,
+				union kbase_ioctl_mem_alloc);
+		break;
+	case KBASE_IOCTL_MEM_QUERY:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
+				kbase_api_mem_query,
+				union kbase_ioctl_mem_query);
+		break;
+	case KBASE_IOCTL_MEM_FREE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
+				kbase_api_mem_free,
+				struct kbase_ioctl_mem_free);
+		break;
+	case KBASE_IOCTL_DISJOINT_QUERY:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
+				kbase_api_disjoint_query,
+				struct kbase_ioctl_disjoint_query);
+		break;
+	case KBASE_IOCTL_GET_DDK_VERSION:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
+				kbase_api_get_ddk_version,
+				struct kbase_ioctl_get_ddk_version);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
+				kbase_api_mem_jit_init_old,
+				struct kbase_ioctl_mem_jit_init_old);
+		break;
+	case KBASE_IOCTL_MEM_JIT_INIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
+				kbase_api_mem_jit_init,
+				struct kbase_ioctl_mem_jit_init);
+		break;
+	case KBASE_IOCTL_MEM_SYNC:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
+				kbase_api_mem_sync,
+				struct kbase_ioctl_mem_sync);
+		break;
+	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
+				kbase_api_mem_find_cpu_offset,
+				union kbase_ioctl_mem_find_cpu_offset);
+		break;
+	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
+				kbase_api_mem_find_gpu_start_and_offset,
+				union kbase_ioctl_mem_find_gpu_start_and_offset);
+		break;
+	case KBASE_IOCTL_GET_CONTEXT_ID:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
+				kbase_api_get_context_id,
+				struct kbase_ioctl_get_context_id);
+		break;
+	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
+				kbase_api_tlstream_acquire,
+				struct kbase_ioctl_tlstream_acquire);
+		break;
+	case KBASE_IOCTL_TLSTREAM_FLUSH:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
+				kbase_api_tlstream_flush);
+		break;
+	case KBASE_IOCTL_MEM_COMMIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
+				kbase_api_mem_commit,
+				struct kbase_ioctl_mem_commit);
+		break;
+	case KBASE_IOCTL_MEM_ALIAS:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
+				kbase_api_mem_alias,
+				union kbase_ioctl_mem_alias);
+		break;
+	case KBASE_IOCTL_MEM_IMPORT:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
+				kbase_api_mem_import,
+				union kbase_ioctl_mem_import);
+		break;
+	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
+				kbase_api_mem_flags_change,
+				struct kbase_ioctl_mem_flags_change);
+		break;
+	case KBASE_IOCTL_STREAM_CREATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
+				kbase_api_stream_create,
+				struct kbase_ioctl_stream_create);
+		break;
+	case KBASE_IOCTL_FENCE_VALIDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
+				kbase_api_fence_validate,
+				struct kbase_ioctl_fence_validate);
+		break;
+	case KBASE_IOCTL_GET_PROFILING_CONTROLS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
+				kbase_api_get_profiling_controls,
+				struct kbase_ioctl_get_profiling_controls);
+		break;
+	case KBASE_IOCTL_MEM_PROFILE_ADD:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
+				kbase_api_mem_profile_add,
+				struct kbase_ioctl_mem_profile_add);
+		break;
+	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
+				kbase_api_soft_event_update,
+				struct kbase_ioctl_soft_event_update);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
+				kbase_api_sticky_resource_map,
+				struct kbase_ioctl_sticky_resource_map);
+		break;
+	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
+				kbase_api_sticky_resource_unmap,
+				struct kbase_ioctl_sticky_resource_unmap);
+		break;
+
+	/* Instrumentation. */
+	case KBASE_IOCTL_HWCNT_READER_SETUP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
+				kbase_api_hwcnt_reader_setup,
+				struct kbase_ioctl_hwcnt_reader_setup);
+		break;
+	case KBASE_IOCTL_HWCNT_ENABLE:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
+				kbase_api_hwcnt_enable,
+				struct kbase_ioctl_hwcnt_enable);
+		break;
+	case KBASE_IOCTL_HWCNT_DUMP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
+				kbase_api_hwcnt_dump);
+		break;
+	case KBASE_IOCTL_HWCNT_CLEAR:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
+				kbase_api_hwcnt_clear);
+		break;
+#ifdef CONFIG_MALI_NO_MALI
+	case KBASE_IOCTL_HWCNT_SET:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
+				kbase_api_hwcnt_set,
+				struct kbase_ioctl_hwcnt_values);
+		break;
+#endif
+#ifdef CONFIG_MALI_JOB_DUMP
+	case KBASE_IOCTL_CINSTR_GWT_START:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
+				kbase_gpu_gwt_start);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_STOP:
+		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
+				kbase_gpu_gwt_stop);
+		break;
+	case KBASE_IOCTL_CINSTR_GWT_DUMP:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
+				kbase_gpu_gwt_dump,
+				union kbase_ioctl_cinstr_gwt_dump);
+		break;
+#endif
+#if MALI_UNIT_TEST
+	case KBASE_IOCTL_TLSTREAM_TEST:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
+				kbase_api_tlstream_test,
+				struct kbase_ioctl_tlstream_test);
+		break;
+	case KBASE_IOCTL_TLSTREAM_STATS:
+		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
+				kbase_api_tlstream_stats,
+				struct kbase_ioctl_tlstream_stats);
+		break;
+#endif
+	}
+
+	dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd));
+
+	return -ENOIOCTLCMD;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue,
+					kbase_event_pending(kctx)) != 0)
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+/**
+ * show_policy - Show callback for the power_policy sysfs file.
+ *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * set_policy - Store callback for the power_policy sysfs file.
+ *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/*
+ * The sysfs file power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/*
+ * show_core_mask - Show callback for the core_mask sysfs file.
+ *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS0) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[0]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS1) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[1]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Current core mask (JS2) : 0x%llX\n",
+			kbdev->pm.debug_core_mask[2]);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			"Available core mask : 0x%llX\n",
+			kbdev->gpu_props.props.raw_props.shader_present);
+
+	return ret;
+}
+
+/**
+ * set_core_mask - Store callback for the core_mask sysfs file.
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask[3];
+	int items;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llx %llx %llx",
+			&new_core_mask[0], &new_core_mask[1],
+			&new_core_mask[2]);
+
+	if (items == 1)
+		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+	if (items == 1 || items == 3) {
+		u64 shader_present =
+				kbdev->gpu_props.props.raw_props.shader_present;
+		u64 group0_core_mask =
+				kbdev->gpu_props.props.coherency_info.group[0].
+				core_mask;
+
+		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+				!(new_core_mask[0] & group0_core_mask) ||
+			(new_core_mask[1] & shader_present) !=
+						new_core_mask[1] ||
+				!(new_core_mask[1] & group0_core_mask) ||
+			(new_core_mask[2] & shader_present) !=
+						new_core_mask[2] ||
+				!(new_core_mask[2] & group0_core_mask)) {
+			dev_err(dev, "power_policy: invalid core specification\n");
+			return -EINVAL;
+		}
+
+		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+				kbdev->pm.debug_core_mask[1] !=
+						new_core_mask[1] ||
+				kbdev->pm.debug_core_mask[2] !=
+						new_core_mask[2]) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+					new_core_mask[1], new_core_mask[2]);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+		"Use format <core_mask>\n"
+		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+	return -EINVAL;
+}
+
+/*
+ * The sysfs file core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+/**
+ * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
+/**
+ * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS,
+ * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	long js_soft_stop_ms;
+	long js_soft_stop_ms_cl;
+	long js_hard_stop_ms_ss;
+	long js_hard_stop_ms_cl;
+	long js_hard_stop_ms_dumping;
+	long js_reset_ms_ss;
+	long js_reset_ms_cl;
+	long js_reset_ms_dumping;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_dumping, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_dumping);
+
+	if (items == 8) {
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		return count;
+	}
+
+	dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+			"Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+			"Write 0 for no change, -1 to restore default timeout\n");
+	return -EINVAL;
+}
+
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
+/**
+ * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_dumping;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_dumping;
+	u32 scheduling_period_ns;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
+
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef GET_TIMEOUT
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ *                            file
+ * @dev:   The device the sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	unsigned int js_scheduling_period;
+	u32 new_scheduling_period_ns;
+	u32 old_period;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	js_data = &kbdev->js_data;
+
+	ret = kstrtouint(buf, 0, &js_scheduling_period);
+	if (ret || !js_scheduling_period) {
+		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+				"Use format <js_scheduling_period_ms>\n");
+		return -EINVAL;
+	}
+
+	new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+	/* Update scheduling timeouts */
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* If no contexts have been scheduled since js_timeouts was last written
+	 * to, the new timeouts might not have been latched yet. So check if an
+	 * update is pending and use the new values if necessary. */
+
+	/* Use previous 'new' scheduling period as a base if present. */
+	old_period = js_data->scheduling_period_ns;
+
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
+
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
+
+#undef SET_TIMEOUT
+
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
+
+	kbase_js_set_timeouts(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
+
+	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+			js_scheduling_period);
+
+	return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ *                             entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	u32 period;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	period = kbdev->js_data.scheduling_period_ns;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+			period / 1000000);
+
+	return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+		show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/**
+ * set_force_replay - Store callback for the force_replay sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = false;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = true;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = false;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/**
+ * show_force_replay - Show callback for the force_replay sysfs file.
+ *
+ * This function is called to get the contents of the force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * The sysfs file force_replay.
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+		set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &softstop_always);
+	if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+				"Use format <soft_stop_always>\n");
+		return -EINVAL;
+	}
+
+	kbdev->js_data.softstop_always = (bool) softstop_always;
+	dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+			(kbdev->js_data.softstop_always) ?
+			"Enabled" : "Disabled");
+	return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present.
+ * The ability to enable soft-stop when only a single context is present can be
+ * used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/* Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/**
+ * show_debug - Show callback for the debug_command sysfs file.
+ *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * issue_debug - Store callback for the debug_command sysfs file.
+ *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes written to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/* The sysfs file debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry.  This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id.  For
+ * example
+ *
+ *    Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	static const struct gpu_product_id_name {
+		unsigned id;
+		char *name;
+	} gpu_product_id_names[] = {
+		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
+		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G72" },
+		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G51" },
+		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G76" },
+		{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G31" },
+		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G52" },
+	};
+	const char *product_name = "(Unknown Mali GPU)";
+	struct kbase_device *kbdev;
+	u32 gpu_id;
+	unsigned product_id, product_id_mask;
+	unsigned i;
+	bool is_new_format;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+	product_id_mask =
+		(is_new_format ?
+			GPU_ID2_PRODUCT_MODEL :
+			GPU_ID_VERSION_PRODUCT_ID) >>
+		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+		    (p->id & product_id_mask) ==
+		    (product_id & product_id_mask)) {
+			product_name = p->name;
+			break;
+		}
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
+		product_name, kbdev->gpu_props.num_cores,
+		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+		product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int dvfs_period;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &dvfs_period);
+	if (ret || dvfs_period <= 0) {
+		dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+				"Use format <dvfs_period_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.dvfs_period = dvfs_period;
+	dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+	return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+	return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+		set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	s64 gpu_poweroff_time;
+	int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+			&poweroff_shader_ticks,
+			&poweroff_gpu_ticks);
+	if (items != 3) {
+		dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+				"Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+		return -EINVAL;
+	}
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+	return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+			kbdev->pm.poweroff_shader_ticks,
+			kbdev->pm.poweroff_gpu_ticks);
+
+	return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+		set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int ret;
+	int reset_timeout;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtoint(buf, 0, &reset_timeout);
+	if (ret || reset_timeout <= 0) {
+		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+				"Use format <reset_timeout_ms>\n");
+		return -EINVAL;
+	}
+
+	kbdev->reset_timeout_ms = reset_timeout;
+	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+	return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+	return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+		set_reset_timeout);
+
+
+
+static ssize_t show_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
+		set_mem_pool_size);
+
+static ssize_t show_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
+			kbase_mem_pool_max_size(&kbdev->mem_pool));
+
+	return ret;
+}
+
+static ssize_t set_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	size_t new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
+		set_mem_pool_max_size);
+
+/**
+ * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the number of large memory pages which currently populate the kbdev pool.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the number of large memory pages which should populate the kbdev pool.
+ * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return err;
+
+	kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
+		set_lp_mem_pool_size);
+
+/**
+ * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool));
+}
+
+/**
+ * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_max_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_max_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
+		set_lp_mem_pool_max_size);
+
+/**
+ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ *                               entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the context scheduling mode information.
+ *
+ * This function is called to get the context scheduling mode being used by JS.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode);
+}
+
+/**
+ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ *                              entry.
+ * @dev:   The device this sysfs file is for.
+ * @attr:  The attributes of the sysfs file.
+ * @buf:   The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called when the js_ctx_scheduling_mode sysfs file is written
+ * to. It checks the data written, and if valid updates the ctx scheduling mode
+ * being by JS.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbasep_kctx_list_element *element;
+	u32 new_js_ctx_scheduling_mode;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode);
+	if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) {
+		dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode"
+				" write operation.\n"
+				"Use format <js_ctx_scheduling_mode>\n");
+		return -EINVAL;
+	}
+
+	if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode)
+		return count;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the context priority mode */
+	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
+
+	/* Adjust priority of all the contexts as per the new mode */
+	list_for_each_entry(element, &kbdev->kctx_list, link)
+		kbase_js_update_ctx_priority(element->kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode);
+
+	return count;
+}
+
+static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
+		show_js_ctx_scheduling_mode,
+		set_js_ctx_scheduling_mode);
+#ifdef CONFIG_DEBUG_FS
+
+/* Number of entries in serialize_jobs_settings[] */
+#define NR_SERIALIZE_JOBS_SETTINGS 5
+/* Maximum string length in serialize_jobs_settings[].name */
+#define MAX_SERIALIZE_JOBS_NAME_LEN 16
+
+static struct
+{
+	char *name;
+	u8 setting;
+} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = {
+	{"none", 0},
+	{"intra-slot", KBASE_SERIALIZE_INTRA_SLOT},
+	{"inter-slot", KBASE_SERIALIZE_INTER_SLOT},
+	{"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT},
+	{"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT |
+			KBASE_SERIALIZE_RESET}
+};
+
+/**
+ * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
+ *                                  file
+ * @sfile: seq_file pointer
+ * @data:  Private callback data
+ *
+ * This function is called to get the contents of the serialize_jobs debugfs
+ * file. This is a list of the available settings with the currently active one
+ * surrounded by square brackets.
+ *
+ * Return: 0 on success, or an error code on error
+ */
+static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_device *kbdev = sfile->private;
+	int i;
+
+	CSTD_UNUSED(data);
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting)
+			seq_printf(sfile, "[%s] ",
+					serialize_jobs_settings[i].name);
+		else
+			seq_printf(sfile, "%s ",
+					serialize_jobs_settings[i].name);
+	}
+
+	seq_puts(sfile, "\n");
+
+	return 0;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs
+ *                                       debugfs file.
+ * @file:  File pointer
+ * @ubuf:  User buffer containing data to store
+ * @count: Number of bytes in user buffer
+ * @ppos:  File position
+ *
+ * This function is called when the serialize_jobs debugfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct kbase_device *kbdev = s->private;
+	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
+	int i;
+	bool valid = false;
+
+	CSTD_UNUSED(ppos);
+
+	count = min_t(size_t, sizeof(buf) - 1, count);
+	if (copy_from_user(buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+					serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+/**
+ * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs
+ *                                     debugfs file
+ * @in:   inode pointer
+ * @file: file pointer
+ *
+ * Return: Zero on success, error code on failure
+ */
+static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
+		struct file *file)
+{
+	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.open = kbasep_serialize_jobs_debugfs_open,
+	.read = seq_read,
+	.write = kbasep_serialize_jobs_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
+static int kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_OF
+	struct device_node *protected_node;
+	struct platform_device *pdev;
+	struct protected_mode_device *protected_dev;
+#endif
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev),
+				GFP_KERNEL);
+		if (!kbdev->protected_dev)
+			return -ENOMEM;
+		kbdev->protected_dev->data = kbdev;
+		kbdev->protected_ops = &kbase_native_protected_ops;
+		kbdev->protected_mode_support = true;
+		return 0;
+	}
+
+	kbdev->protected_mode_support = false;
+
+#ifdef CONFIG_OF
+	protected_node = of_parse_phandle(kbdev->dev->of_node,
+			"protected-mode-switcher", 0);
+
+	if (!protected_node)
+		protected_node = of_parse_phandle(kbdev->dev->of_node,
+				"secure-mode-switcher", 0);
+
+	if (!protected_node) {
+		/* If protected_node cannot be looked up then we assume
+		 * protected mode is not supported on this platform. */
+		dev_info(kbdev->dev, "Protected mode not available\n");
+		return 0;
+	}
+
+	pdev = of_find_device_by_node(protected_node);
+	if (!pdev)
+		return -EINVAL;
+
+	protected_dev = platform_get_drvdata(pdev);
+	if (!protected_dev)
+		return -EPROBE_DEFER;
+
+	kbdev->protected_ops = &protected_dev->ops;
+	kbdev->protected_dev = protected_dev;
+
+	if (kbdev->protected_ops) {
+		int err;
+
+		/* Make sure protected mode is disabled on startup */
+		mutex_lock(&kbdev->pm.lock);
+		err = kbdev->protected_ops->protected_mode_disable(
+				kbdev->protected_dev);
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* protected_mode_disable() returns -EINVAL if not supported */
+		kbdev->protected_mode_support = 0; //mnk
+	}
+#endif
+	return 0;
+}
+
+static void kbasep_protected_mode_term(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+		kfree(kbdev->protected_dev);
+}
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = 0;
+
+	if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return err;
+
+ out_ioremap:
+	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = of_clk_get(kbdev->dev->of_node, 0);
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
+	|| defined(LSK_OPPV2_BACKPORT)
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \
+		defined(LSK_OPPV2_BACKPORT)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef MALI_KBASE_BUILD
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+	kbase_pm_context_active(kbdev);
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+	struct kbase_device *kbdev; \
+	kbdev = (struct kbase_device *)data; \
+	kbdev->hw_quirks_##type = (u32)val; \
+	trigger_quirks_reload(kbdev); \
+	return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+	struct kbase_device *kbdev;\
+	kbdev = (struct kbase_device *)data;\
+	*val = kbdev->hw_quirks_##type;\
+	return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+		type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+MAKE_QUIRK_ACCESSORS(jm);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+/**
+ * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
+ * @file: File object to read is for
+ * @buf:  User buffer to populate with data
+ * @len:  Length of user buffer
+ * @ppos: Offset within file object
+ *
+ * Retrieves the current status of protected debug mode
+ * (0 = disabled, 1 = enabled)
+ *
+ * Return: Number of bytes added to user buffer
+ */
+static ssize_t debugfs_protected_debug_mode_read(struct file *file,
+				char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
+	u32 gpu_status;
+	ssize_t ret_val;
+
+	kbase_pm_context_active(kbdev);
+	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS));
+	kbase_pm_context_idle(kbdev);
+
+	if (gpu_status & GPU_DBGEN)
+		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
+	else
+		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
+
+	return ret_val;
+}
+
+/*
+ * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
+ *
+ * Contains the file operations for the "protected_debug_mode" debugfs file
+ */
+static const struct file_operations fops_protected_debug_mode = {
+	.open = simple_open,
+	.read = debugfs_protected_debug_mode_read,
+	.llseek = default_llseek,
+};
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *debugfs_ctx_defaults_directory;
+	int err;
+
+	kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+			NULL);
+	if (!kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+			kbdev->mali_debugfs_directory);
+	if (!kbdev->debugfs_ctx_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+			kbdev->debugfs_ctx_directory);
+	if (!debugfs_ctx_defaults_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+#if !MALI_CUSTOMER_RELEASE
+	kbasep_regs_dump_debugfs_init(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+	kbasep_regs_history_debugfs_init(kbdev);
+
+	kbase_debug_job_fault_debugfs_init(kbdev);
+	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+	/* fops_* variables created by invocations of macro
+	 * MAKE_QUIRK_ACCESSORS() above. */
+	debugfs_create_file("quirks_sc", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_sc_quirks);
+	debugfs_create_file("quirks_tiler", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_tiler_quirks);
+	debugfs_create_file("quirks_mmu", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_mmu_quirks);
+	debugfs_create_file("quirks_jm", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&fops_jm_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+	debugfs_create_bool("infinite_cache", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->infinite_cache_active_default);
+
+	debugfs_create_size_t("mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_max_size_default);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
+		debugfs_create_file("protected_debug_mode", S_IRUGO,
+				kbdev->mali_debugfs_directory, kbdev,
+				&fops_protected_debug_mode);
+	}
+
+#if KBASE_TRACE_ENABLE
+	kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#ifdef CONFIG_DEVFREQ_THERMAL
+	if (kbdev->inited_subsys & inited_devfreq)
+		kbase_ipa_debugfs_init(kbdev);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_serialize_jobs_debugfs_fops);
+#endif /* CONFIG_DEBUG_FS */
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+	return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
+
+static void kbase_device_coherency_init(struct kbase_device *kbdev,
+		unsigned prod_id)
+{
+#ifdef CONFIG_OF
+	u32 supported_coherency_bitmap =
+		kbdev->gpu_props.props.raw_props.coherency_mode;
+	const void *coherency_override_dts;
+	u32 override_coherency;
+
+	/* Only for tMIx :
+	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+	 * documented for tMIx so force correct value here.
+	 */
+	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
+		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+				   GPU_ID2_PRODUCT_TMIX))
+		if (supported_coherency_bitmap ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
+			supported_coherency_bitmap |=
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+
+#endif /* CONFIG_OF */
+
+	kbdev->system_coherency = COHERENCY_NONE;
+
+	/* device tree may override the coherency */
+#ifdef CONFIG_OF
+	coherency_override_dts = of_get_property(kbdev->dev->of_node,
+						"system-coherency",
+						NULL);
+	if (coherency_override_dts) {
+
+		override_coherency = be32_to_cpup(coherency_override_dts);
+
+		if ((override_coherency <= COHERENCY_NONE) &&
+			(supported_coherency_bitmap &
+			 COHERENCY_FEATURE_BIT(override_coherency))) {
+
+			kbdev->system_coherency = override_coherency;
+
+			dev_info(kbdev->dev,
+				"Using coherency mode %u set from dtb",
+				override_coherency);
+		} else
+			dev_warn(kbdev->dev,
+				"Ignoring unsupported coherency mode %u set from dtb",
+				override_coherency);
+	}
+
+#endif /* CONFIG_OF */
+
+	kbdev->gpu_props.props.raw_props.coherency_mode =
+		kbdev->system_coherency;
+}
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+
+/* Callback used by the kbase bus logger client, to initiate a GPU reset
+ * when the bus log is restarted.  GPU reset is used as reference point
+ * in HW bus log analyses.
+ */
+static void kbase_logging_started_cb(void *data)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+	dev_info(kbdev->dev, "KBASE - Bus logger restarted\n");
+}
+#endif
+
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	&dev_attr_lp_mem_pool_size.attr,
+	&dev_attr_lp_mem_pool_max_size.attr,
+	&dev_attr_js_ctx_scheduling_mode.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfree(kbdev->gpu_props.prop_buffer);
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
+
+
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
+
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
+
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
+
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
+
+#ifdef MALI_KBASE_BUILD
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
+	}
+#endif
+
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
+
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+
+
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
+	}
+
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
+
+	/* Bring job and mem sys to a halt before we continue termination */
+
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (kbdev->inited_subsys & inited_protected) {
+		kbasep_protected_mode_term(kbdev);
+		kbdev->inited_subsys &= ~inited_protected;
+	}
+
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
+
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
+
+	if (kbdev->inited_subsys & inited_ctx_sched) {
+		kbase_ctx_sched_term(kbdev);
+		kbdev->inited_subsys &= ~inited_ctx_sched;
+	}
+
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
+	}
+
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
+	}
+
+	if (kbdev->inited_subsys & inited_io_history) {
+		kbase_io_history_term(&kbdev->io_history);
+		kbdev->inited_subsys &= ~inited_io_history;
+	}
+
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
+	}
+
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
+	}
+
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
+
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+
+/* Number of register accesses for the buffer that we allocate during
+ * initialization time. The buffer size can be changed later via debugfs. */
+#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	unsigned prod_id;
+	const struct list_head *dev_list;
+	int err = 0;
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+#ifdef CONFIG_MALI_NO_MALI
+	err = gpu_device_create(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_gpu_device;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
+
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_power_control;
+
+	err = kbase_io_history_init(&kbdev->io_history,
+			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Register access history initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
+	}
+	kbdev->inited_subsys |= inited_io_history;
+
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
+
+	kbase_disjoint_init(kbdev);
+
+	/* obtain max configured gpu frequency, if devfreq is enabled then
+	 * this will be overridden by the highest operating point found
+	 */
+	core_props = &(kbdev->gpu_props.props.core_props);
+#ifdef GPU_FREQ_KHZ_MAX
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+#else
+	core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
+#endif
+
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
+
+	err = kbase_ctx_sched_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n",
+				err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_ctx_sched;
+
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbase_device_coherency_init(kbdev, prod_id);
+
+	err = kbasep_protected_mode_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_protected;
+
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
+
+	err = kbasep_js_devdata_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_js;
+
+	err = kbase_tlstream_init();
+	if (err) {
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_tlstream;
+
+	err = kbase_backend_late_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_late;
+
+	/* Initialize the kctx list. This is used by vinstr. */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses vinstr, so must be initialized after it. */
+	err = kbase_devfreq_init(kbdev);
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef MALI_KBASE_BUILD
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
+
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->mdev.mode = 0666;
+	kbdev->inited_subsys |= inited_get_device;
+
+	/* This needs to happen before registering the device with misc_register(),
+	 * otherwise it causes a race condition between registering the device and a
+	 * uevent event being generated for userspace, causing udev rules to run
+	 * which might expect certain sysfs attributes present. As a result of the
+	 * race condition we avoid, some Mali sysfs entries may have appeared to
+	 * udev to not exist.
+
+	 * For more information, see
+	 * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the
+	 * paragraph that starts with "Word of warning", currently the second-last
+	 * paragraph.
+	 */
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_sysfs_group;
+
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
+
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
+
+	err = kbase_gpuprops_populate_user_buffer(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "GPU property population failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+  dev_info(kbdev->dev, "GPU freq is %lu \n", clk_get_rate(kbdev->clock) );
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	kbase_dev_nr++;
+#endif /* MALI_KBASE_BUILD */
+
+	return err;
+}
+
+#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
+
+/**
+ * kbase_device_suspend - Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/**
+ * kbase_device_resume - Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @dev:  The device to resume
+ *
+ * Return: A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+	return 0;
+}
+
+/**
+ * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in
+ * which it will not be able to communicate with the CPU(s) and RAM due to
+ * power management.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	if (kbdev->pm.backend.callback_power_runtime_off) {
+		kbdev->pm.backend.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/**
+ * kbase_device_runtime_resume - Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @dev:  The device to suspend
+ *
+ * Return: A standard Linux error code
+ */
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.backend.callback_power_runtime_on) {
+		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_MALI_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (kbdev->inited_subsys & inited_devfreq)
+		devfreq_resume_device(kbdev->devfreq);
+#endif
+
+	return ret;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
+ *
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
+ *
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
+ */
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* Use platform specific implementation if it exists. */
+	if (kbdev->pm.backend.callback_power_runtime_idle)
+		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+	return 0;
+}
+#endif /* KBASE_PM_RUNTIME */
+
+/* The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef KBASE_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* KBASE_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_register();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&kbase_platform_driver);
+
+	if (ret)
+		kbase_platform_unregister();
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+	kbase_platform_unregister();
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+		__stringify(BASE_UK_VERSION_MAJOR) "." \
+		__stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
new file mode 100644
index 0000000000000..bda05602de5e4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_ctx_sched.h"
+
+int kbase_ctx_sched_init(struct kbase_device *kbdev)
+{
+	int as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+
+	/* These two must be recalculated if nr_hw_address_spaces changes
+	 * (e.g. for HW workarounds) */
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		bool use_workaround;
+
+		use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+
+	kbdev->as_free = as_present; /* All ASs initially free */
+
+	memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx));
+
+	return 0;
+}
+
+void kbase_ctx_sched_term(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	/* Sanity checks */
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		WARN_ON(kbdev->as_to_kctx[i] != NULL);
+		WARN_ON(!(kbdev->as_free & (1u << i)));
+	}
+}
+
+/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
+ *
+ * @kbdev: The context for which to find a free address space
+ *
+ * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID
+ *
+ * This function returns an address space available for use. It would prefer
+ * returning an AS that has been previously assigned to the context to
+ * avoid having to reprogram the MMU.
+ */
+static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+	int free_as;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* First check if the previously assigned AS is available */
+	if ((kctx->as_nr != KBASEP_AS_NR_INVALID) &&
+			(kbdev->as_free & (1u << kctx->as_nr)))
+		return kctx->as_nr;
+
+	/* The previously assigned AS was taken, we'll be returning any free
+	 * AS at this point.
+	 */
+	free_as = ffs(kbdev->as_free) - 1;
+	if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces)
+		return free_as;
+
+	return KBASEP_AS_NR_INVALID;
+}
+
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	if (atomic_inc_return(&kctx->refcount) == 1) {
+		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
+
+		if (free_as != KBASEP_AS_NR_INVALID) {
+			kbdev->as_free &= ~(1u << free_as);
+			/* Only program the MMU if the context has not been
+			 * assigned the same address space before.
+			 */
+			if (free_as != kctx->as_nr) {
+				struct kbase_context *const prev_kctx =
+					kbdev->as_to_kctx[free_as];
+
+				if (prev_kctx) {
+					WARN_ON(atomic_read(&prev_kctx->refcount) != 0);
+					kbase_mmu_disable(prev_kctx);
+					prev_kctx->as_nr = KBASEP_AS_NR_INVALID;
+				}
+
+				kctx->as_nr = free_as;
+				kbdev->as_to_kctx[free_as] = kctx;
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
+			}
+		} else {
+			atomic_dec(&kctx->refcount);
+
+			/* Failed to find an available address space, we must
+			 * be returning an error at this point.
+			 */
+			WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID);
+		}
+	}
+
+	return kctx->as_nr;
+}
+
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(atomic_read(&kctx->refcount) == 0);
+	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
+	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+
+	atomic_inc(&kctx->refcount);
+}
+
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (atomic_dec_return(&kctx->refcount) == 0)
+		kbdev->as_free |= (1u << kctx->as_nr);
+}
+
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(atomic_read(&kctx->refcount) != 0);
+
+	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+		if (kbdev->pm.backend.gpu_powered)
+			kbase_mmu_disable(kctx);
+
+		kbdev->as_to_kctx[kctx->as_nr] = NULL;
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+	}
+}
+
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
+{
+	s8 i;
+
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->pm.backend.gpu_powered);
+
+	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
+		struct kbase_context *kctx;
+
+		kctx = kbdev->as_to_kctx[i];
+		if (kctx) {
+			if (atomic_read(&kctx->refcount)) {
+				WARN_ON(kctx->as_nr != i);
+
+				kbase_mmu_update(kbdev, &kctx->mmu,
+					kctx->as_nr);
+			} else {
+				/* This context might have been assigned an
+				 * AS before, clear it.
+				 */
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+			}
+		} else {
+			kbase_mmu_disable_as(kbdev, i);
+		}
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
new file mode 100644
index 0000000000000..ab57a0dc1ca8b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CTX_SCHED_H_
+#define _KBASE_CTX_SCHED_H_
+
+#include <mali_kbase.h>
+
+/**
+ * The Context Scheduler manages address space assignment and reference
+ * counting to kbase_context. The interface has been designed to minimise
+ * interactions between the Job Scheduler and Power Management/MMU to support
+ * the existing Job Scheduler interface.
+ *
+ * The initial implementation of the Context Scheduler does not schedule
+ * contexts. Instead it relies on the Job Scheduler to make decisions of
+ * when to schedule/evict contexts if address spaces are starved. In the
+ * future, once an interface between the CS and JS has been devised to
+ * provide enough information about how each context is consuming GPU resources,
+ * those decisions can be made in the CS itself, thereby reducing duplicated
+ * code.
+ */
+
+/**
+ * kbase_ctx_sched_init - Initialise the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be initialised
+ *
+ * This must be called during device initialisation. The number of hardware
+ * address spaces must already be established before calling this function.
+ *
+ * Return: 0 for success, otherwise failure
+ */
+int kbase_ctx_sched_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_term - Terminate the context scheduler
+ * @kbdev: The device for which the context scheduler needs to be terminated
+ *
+ * This must be called during device termination after all contexts have been
+ * destroyed.
+ */
+void kbase_ctx_sched_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
+ * @kctx: The context to which to retain a reference
+ *
+ * This function should be called whenever an address space should be assigned
+ * to a context and programmed onto the MMU. It should typically be called
+ * when jobs are ready to be submitted to the GPU.
+ *
+ * It can be called as many times as necessary. The address space will be
+ * assigned to the context for as long as there is a reference to said context.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ *
+ * Return: The address space that the context has been assigned to or
+ *         KBASEP_AS_NR_INVALID if no address space was available.
+ */
+int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_retain_ctx_refcount
+ * @kctx: The context to which to retain a reference
+ *
+ * This function only retains a reference to the context. It must be called
+ * only when the context already has a reference.
+ *
+ * This is typically called inside an atomic session where we know the context
+ * is already scheduled in but want to take an extra reference to ensure that
+ * it doesn't get descheduled.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context
+ * @kctx: The context from which to release a reference
+ *
+ * This function should be called whenever an address space could be unassigned
+ * from a context. When there are no more references to said context, the
+ * address space previously assigned to this context shall be reassigned to
+ * other contexts as needed.
+ *
+ * The kbase_device::hwaccess_lock must be held whilst calling this function
+ */
+void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space
+ * @kctx: The context to be removed
+ *
+ * This function should be called when a context is being destroyed. The
+ * context must no longer have any reference. If it has been assigned an
+ * address space before then the AS will be unprogrammed.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
+
+/**
+ * kbase_ctx_sched_restore_all_as - Reprogram all address spaces
+ * @kbdev: The device for which address spaces to be reprogrammed
+ *
+ * This function shall reprogram all address spaces previously assigned to
+ * contexts. It can be used after the GPU is reset.
+ *
+ * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
+ * held whilst calling this function.
+ */
+void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CTX_SCHED_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 0000000000000..118f787fb74cc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 0000000000000..2fdb72d943e47
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,169 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (false)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
new file mode 100644
index 0000000000000..d2c09d6658f21
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -0,0 +1,504 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return ret;
+}
+
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		return true;
+	}
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
+			return false;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
+}
+
+/* wait until the fault happen and copy the event */
+static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
+		struct base_job_fault_event *event)
+{
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
+	struct base_job_fault_event *event_in;
+	unsigned long               flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		if (wait_event_interruptible(kbdev->job_fault_wq,
+				 kbase_is_job_fault_event_pending(kbdev)))
+			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	event_in = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	event->event_code = event_in->event_code;
+	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	return 0;
+
+}
+
+/* remove the event from the queue */
+static struct base_job_fault_event *kbase_job_fault_event_dequeue(
+		struct kbase_device *kbdev, struct list_head *event_list)
+{
+	struct base_job_fault_event *event;
+
+	event = list_entry(event_list->next,
+			struct base_job_fault_event, head);
+	list_del(event_list->next);
+
+	return event;
+
+}
+
+/* Remove all the following atoms after the failed atom in the same context
+ * Call the postponed bottom half of job done.
+ * Then, this context could be rescheduled.
+ */
+static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->job_fault_resume_event_list;
+
+	while (!list_empty(event_list)) {
+		struct base_job_fault_event *event;
+
+		event = kbase_job_fault_event_dequeue(kctx->kbdev,
+				&kctx->job_fault_resume_event_list);
+		kbase_jd_done_worker(&event->katom->work);
+	}
+
+}
+
+/* Remove all the failed atoms that belong to different contexts
+ * Resume all the contexts that were suspend due to failed job
+ */
+static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
+{
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+}
+
+static void kbase_job_fault_resume_worker(struct work_struct *data)
+{
+	struct base_job_fault_event *event = container_of(data,
+			struct base_job_fault_event, job_fault_work);
+	struct kbase_context *kctx;
+	struct kbase_jd_atom *katom;
+
+	katom = event->katom;
+	kctx = katom->kctx;
+
+	dev_info(kctx->kbdev->dev, "Job dumping wait\n");
+
+	/* When it was waked up, it need to check if queue is empty or the
+	 * failed atom belongs to different context. If yes, wake up. Both
+	 * of them mean the failed job has been dumped. Please note, it
+	 * should never happen that the job_fault_event_list has the two
+	 * atoms belong to the same context.
+	 */
+	wait_event(kctx->kbdev->job_fault_resume_wq,
+			 kbase_ctx_has_no_event_pending(kctx));
+
+	atomic_set(&kctx->job_fault_count, 0);
+	kbase_jd_done_worker(&katom->work);
+
+	/* In case the following atoms were scheduled during failed job dump
+	 * the job_done_worker was held. We need to rerun it after the dump
+	 * was finished
+	 */
+	kbase_job_fault_resume_event_cleanup(kctx);
+
+	dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n");
+}
+
+static struct base_job_fault_event *kbase_job_fault_event_queue(
+		struct list_head *event_list,
+		struct kbase_jd_atom *atom,
+		u32 completion_code)
+{
+	struct base_job_fault_event *event;
+
+	event = &atom->fault_event;
+
+	event->katom = atom;
+	event->event_code = completion_code;
+
+	list_add_tail(&event->head, event_list);
+
+	return event;
+
+}
+
+static void kbase_job_fault_event_post(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, u32 completion_code)
+{
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
+				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
+	wake_up_interruptible(&kbdev->job_fault_wq);
+
+	INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker);
+	queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work);
+
+	dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d",
+			katom->kctx->tgid, katom->kctx->id);
+
+}
+
+/*
+ * This function will process the job fault
+ * Get the register copy
+ * Send the failed job dump event
+ * Create a Wait queue to wait until the job dump finish
+ */
+
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Check if dumping is in the process
+	 * only one atom of each context can be dumped at the same time
+	 * If the atom belongs to different context, it can be dumped
+	 */
+	if (atomic_read(&kctx->job_fault_count) > 0) {
+		kbase_job_fault_event_queue(
+				&kctx->job_fault_resume_event_list,
+				katom, completion_code);
+		dev_info(kctx->kbdev->dev, "queue:%d\n",
+				kbase_jd_atom_id(kctx, katom));
+		return true;
+	}
+
+	if (kctx->kbdev->job_fault_debug == true) {
+
+		if (completion_code != BASE_JD_EVENT_DONE) {
+
+			if (kbase_job_fault_get_reg_snapshot(kctx) == false) {
+				dev_warn(kctx->kbdev->dev, "get reg dump failed\n");
+				return false;
+			}
+
+			kbase_job_fault_event_post(kctx->kbdev, katom,
+					completion_code);
+			atomic_inc(&kctx->job_fault_count);
+			dev_info(kctx->kbdev->dev, "post:%d\n",
+					kbase_jd_atom_id(kctx, katom));
+			return true;
+
+		}
+	}
+	return false;
+
+}
+
+static int debug_job_fault_show(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+	struct kbase_context *kctx = event->katom->kctx;
+	int i;
+
+	dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d",
+			kctx->tgid, kctx->id, event->reg_offset);
+
+	if (kctx->reg_dump == NULL) {
+		dev_warn(kbdev->dev, "reg dump is NULL");
+		return -1;
+	}
+
+	if (kctx->reg_dump[event->reg_offset] ==
+			REGISTER_DUMP_TERMINATION_FLAG) {
+		/* Return the error here to stop the read. And the
+		 * following next() will not be called. The stop can
+		 * get the real event resource and release it
+		 */
+		return -1;
+	}
+
+	if (event->reg_offset == 0)
+		seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id);
+
+	for (i = 0; i < 50; i++) {
+		if (kctx->reg_dump[event->reg_offset] ==
+				REGISTER_DUMP_TERMINATION_FLAG) {
+			break;
+		}
+		seq_printf(m, "%08x: %08x\n",
+				kctx->reg_dump[event->reg_offset],
+				kctx->reg_dump[1+event->reg_offset]);
+		event->reg_offset += 2;
+
+	}
+
+
+	return 0;
+}
+static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event = (struct base_job_fault_event *)v;
+
+	dev_info(kbdev->dev, "debug job fault seq next:%d, %d",
+			event->reg_offset, (int)*pos);
+
+	return event;
+}
+
+static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
+{
+	struct kbase_device *kbdev = m->private;
+	struct base_job_fault_event *event;
+
+	dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos);
+
+	/* The condition is trick here. It needs make sure the
+	 * fault hasn't happened and the dumping hasn't been started,
+	 * or the dumping has finished
+	 */
+	if (*pos == 0) {
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
+		event->reg_offset = 0;
+		if (kbase_job_fault_event_wait(kbdev, event)) {
+			kfree(event);
+			return NULL;
+		}
+
+		/* The cache flush workaround is called in bottom half of
+		 * job done but we delayed it. Now we should clean cache
+		 * earlier. Then the GPU memory dump should be correct.
+		 */
+		kbase_backend_cacheclean(kbdev, event->katom);
+	} else
+		return NULL;
+
+	return event;
+}
+
+static void debug_job_fault_stop(struct seq_file *m, void *v)
+{
+	struct kbase_device *kbdev = m->private;
+
+	/* here we wake up the kbase_jd_done_worker after stop, it needs
+	 * get the memory dump before the register dump in debug daemon,
+	 * otherwise, the memory dump may be incorrect.
+	 */
+
+	if (v != NULL) {
+		kfree(v);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
+
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+		if (!list_empty(&kbdev->job_fault_event_list)) {
+			kbase_job_fault_event_dequeue(kbdev,
+				&kbdev->job_fault_event_list);
+			wake_up(&kbdev->job_fault_resume_wq);
+		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
+	}
+
+}
+
+static const struct seq_operations ops = {
+	.start = debug_job_fault_start,
+	.next = debug_job_fault_next,
+	.stop = debug_job_fault_stop,
+	.show = debug_job_fault_show,
+};
+
+static int debug_job_fault_open(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_open(file, &ops);
+
+	((struct seq_file *)file->private_data)->private = kbdev;
+	dev_info(kbdev->dev, "debug job fault seq open");
+
+	kbdev->job_fault_debug = true;
+
+	return 0;
+
+}
+
+static int debug_job_fault_release(struct inode *in, struct file *file)
+{
+	struct kbase_device *kbdev = in->i_private;
+
+	seq_release(in, file);
+
+	kbdev->job_fault_debug = false;
+
+	/* Clean the unprocessed job fault. After that, all the suspended
+	 * contexts could be rescheduled.
+	 */
+	kbase_job_fault_event_cleanup(kbdev);
+
+	dev_info(kbdev->dev, "debug job fault seq close");
+
+	return 0;
+}
+
+static const struct file_operations kbasep_debug_job_fault_fops = {
+	.open = debug_job_fault_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = debug_job_fault_release,
+};
+
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("job_fault", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+
+	INIT_LIST_HEAD(&kbdev->job_fault_event_list);
+
+	init_waitqueue_head(&(kbdev->job_fault_wq));
+	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
+
+	kbdev->job_fault_resume_workq = alloc_workqueue(
+			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+	if (!kbdev->job_fault_resume_workq)
+		return -ENOMEM;
+
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+/*
+ * Release the relevant resource per device
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->job_fault_resume_workq);
+}
+
+
+/*
+ *  Initialize the relevant data structure per context
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
+{
+
+	/* We need allocate double size register range
+	 * Because this memory will keep the register address and value
+	 */
+	kctx->reg_dump = vmalloc(0x4000 * 2);
+	if (kctx->reg_dump == NULL)
+		return;
+
+	if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
+		vfree(kctx->reg_dump);
+		kctx->reg_dump = NULL;
+	}
+	INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
+	atomic_set(&kctx->job_fault_count, 0);
+
+}
+
+/*
+ *  release the relevant resource per context
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+	vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+	kbdev->job_fault_debug = false;
+
+	return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
new file mode 100644
index 0000000000000..f5ab0a44c1d41
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -0,0 +1,101 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_JOB_FAULT_H
+#define _KBASE_DEBUG_JOB_FAULT_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
+
+/**
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *		per device and initialize the required lists.
+ * @kbdev:	Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *		kbase_debug_job_fault_dev_init.
+ * @kbdev:	Device pointer
+ */
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_context_init - Initialize the relevant
+ *		data structure per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_context_term - Release the relevant
+ *		resource per context
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
+
+/**
+ * kbase_debug_job_fault_process - Process the failed job.
+ *      It will send a event and wake up the job fault waiting queue
+ *      Then create a work queue to wait for job dump finish
+ *      This function should be called in the interrupt handler and before
+ *      jd_done that make sure the jd_done_worker will be delayed until the
+ *      job dump finish
+ * @katom: The failed atom pointer
+ * @completion_code: the job status
+ * @return true if dump is going on
+ */
+bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
+		u32 completion_code);
+
+
+/**
+ * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
+ *      address during the job fault process, the relevant registers will
+ *      be saved when a job fault happen
+ * @kctx: KBase context pointer
+ * @reg_range: Maximum register address space
+ * @return true if initializing successfully
+ */
+bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
+		int reg_range);
+
+/**
+ * kbase_job_fault_get_reg_snapshot - Read the interested registers for
+ *      failed job dump
+ * @kctx: KBase context pointer
+ * @return true if getting registers successfully
+ */
+bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
+
+#endif  /*_KBASE_DEBUG_JOB_FAULT_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 0000000000000..ee4552913b7af
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,305 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+	struct list_head node;
+
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long flags;
+
+	u64 start_pfn;
+	size_t nr_pages;
+};
+
+struct debug_mem_data {
+	struct list_head mapping_list;
+	struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+	struct list_head *lh;
+	size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data;
+	struct debug_mem_mapping *map;
+	loff_t pos = *_pos;
+
+	list_for_each_entry(map, &mem_data->mapping_list, node) {
+		if (pos >= map->nr_pages) {
+			pos -= map->nr_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->lh = &map->node;
+			data->offset = pos;
+			return data;
+		}
+	}
+
+	/* Beyond the end */
+	return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	if (data->offset < map->nr_pages - 1) {
+		data->offset++;
+		++*pos;
+		return data;
+	}
+
+	if (list_is_last(data->lh, &mem_data->mapping_list)) {
+		kfree(data);
+		return NULL;
+	}
+
+	data->lh = data->lh->next;
+	data->offset = 0;
+	++*pos;
+
+	return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+	struct debug_mem_data *mem_data = m->private;
+	struct debug_mem_seq_off *data = v;
+	struct debug_mem_mapping *map;
+	int i, j;
+	struct page *page;
+	uint32_t *mapping;
+	pgprot_t prot = PAGE_KERNEL;
+
+	map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+	kbase_gpu_vm_lock(mem_data->kctx);
+
+	if (data->offset >= map->alloc->nents) {
+		seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+				data->offset) << PAGE_SHIFT);
+		goto out;
+	}
+
+	if (!(map->flags & KBASE_REG_CPU_CACHED))
+		prot = pgprot_writecombine(prot);
+
+	page = as_page(map->alloc->pages[data->offset]);
+	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
+
+	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+		seq_printf(m, "%016llx:", i + ((map->start_pfn +
+				data->offset) << PAGE_SHIFT));
+
+		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+		seq_putc(m, '\n');
+	}
+
+	vunmap(mapping);
+
+	seq_putc(m, '\n');
+
+out:
+	kbase_gpu_vm_unlock(mem_data->kctx);
+	return 0;
+}
+
+static const struct seq_operations ops = {
+	.start = debug_mem_start,
+	.next = debug_mem_next,
+	.stop = debug_mem_stop,
+	.show = debug_mem_show,
+};
+
+static int debug_mem_zone_open(struct rb_root *rbtree,
+						struct debug_mem_data *mem_data)
+{
+	int ret = 0;
+	struct rb_node *p;
+	struct kbase_va_region *reg;
+	struct debug_mem_mapping *mapping;
+
+	for (p = rb_first(rbtree); p; p = rb_next(p)) {
+		reg = rb_entry(p, struct kbase_va_region, rblink);
+
+		if (reg->gpu_alloc == NULL)
+			/* Empty region - ignore */
+			continue;
+
+		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		mapping->start_pfn = reg->start_pfn;
+		mapping->nr_pages = reg->nr_pages;
+		mapping->flags = reg->flags;
+		list_add_tail(&mapping->node, &mem_data->mapping_list);
+	}
+
+out:
+	return ret;
+}
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+	struct file *kctx_file = i->i_private;
+	struct kbase_context *kctx = kctx_file->private_data;
+	struct debug_mem_data *mem_data;
+	int ret;
+
+	ret = seq_open(file, &ops);
+	if (ret)
+		return ret;
+
+	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mem_data->kctx = kctx;
+
+	INIT_LIST_HEAD(&mem_data->mapping_list);
+
+	get_file(kctx_file);
+
+	kbase_gpu_vm_lock(kctx);
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
+	if (0 != ret) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	((struct seq_file *)file->private_data)->private = mem_data;
+
+	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+	struct file *kctx_file = inode->i_private;
+	struct seq_file *sfile = file->private_data;
+	struct debug_mem_data *mem_data = sfile->private;
+	struct debug_mem_mapping *mapping;
+
+	seq_release(inode, file);
+
+	while (!list_empty(&mem_data->mapping_list)) {
+		mapping = list_first_entry(&mem_data->mapping_list,
+				struct debug_mem_mapping, node);
+		kbase_mem_phy_alloc_put(mapping->alloc);
+		list_del(&mapping->node);
+		kfree(mapping);
+	}
+
+	kfree(mem_data);
+
+	fput(kctx_file);
+
+	return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+	.open = debug_mem_open,
+	.release = debug_mem_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+	struct kbase_context *kctx = kctx_file->private_data;
+
+	debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
+			&kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 0000000000000..886ca9448cf57
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,30 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 0000000000000..4adfe35678cfa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,2233 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_gpuprops_types.h>
+#include <protected_mode_switcher.h>
+
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sizes.h>
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#include <linux/bus_logger.h>
+#endif
+
+#if defined(CONFIG_SYNC)
+#include <sync.h>
+#else
+#include "mali_kbase_fence_defs.h"
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#if defined(CONFIG_PM_RUNTIME) || \
+	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+#define KBASE_PM_RUNTIME 1
+#endif
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#define MIDGARD_MMU_LEVEL(x) (x)
+
+#define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
+
+#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/* Maximum number of pages of memory that require a permanent mapping, per
+ * kbase_context
+ */
+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \
+								PAGE_SHIFT)
+
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to
+ * disambiguate short-running job chains during soft/hard stopping of jobs
+ */
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
+/* Atom is waiting for L2 caches to power up in order to enter protected mode */
+#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+/* Serialize atoms within a slot (ie only one atom per job slot) */
+#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
+/* Serialize atoms between slots (ie only one job slot running at any time) */
+#define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
+/* Reset the GPU after each atom completion */
+#define KBASE_SERIALIZE_RESET (1 << 2)
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+struct kbase_mmu_setup;
+struct kbase_ipa_model_vinstr_data;
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * struct base_job_fault_event - keeps track of the atom which faulted or which
+ *                               completed after the faulty atom but before the
+ *                               debug data for faulty atom was dumped.
+ *
+ * @event_code:     event code for the atom, should != BASE_JD_EVENT_DONE for the
+ *                  atom which faulted.
+ * @katom:          pointer to the atom for which job fault occurred or which completed
+ *                  after the faulty atom.
+ * @job_fault_work: work item, queued only for the faulty atom, which waits for
+ *                  the dumping to get completed and then does the bottom half
+ *                  of job done for the atoms which followed the faulty atom.
+ * @head:           List head used to store the atom in the global list of faulty
+ *                  atoms or context specific list of atoms which got completed
+ *                  during the dump.
+ * @reg_offset:     offset of the register to be dumped next, only applicable for
+ *                  the faulty atom.
+ */
+struct base_job_fault_event {
+
+	u32 event_code;
+	struct kbase_jd_atom *katom;
+	struct work_struct job_fault_work;
+	struct list_head head;
+	int reg_offset;
+};
+
+#endif
+
+/**
+ * struct kbase_jd_atom_dependency - Contains the dependency info for an atom.
+ * @atom:          pointer to the dependee atom.
+ * @dep_type:      type of dependency on the dependee @atom, i.e. order or data
+ *                 dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency.
+ */
+struct kbase_jd_atom_dependency {
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * struct kbase_io_access - holds information about 1 register access
+ *
+ * @addr: first bit indicates r/w (r=0, w=1)
+ * @value: value written or read
+ */
+struct kbase_io_access {
+	uintptr_t addr;
+	u32 value;
+};
+
+/**
+ * struct kbase_io_history - keeps track of all recent register accesses
+ *
+ * @enabled: true if register accesses are recorded, false otherwise
+ * @lock: spinlock protecting kbase_io_access array
+ * @count: number of registers read/written
+ * @size: number of elements in kbase_io_access array
+ * @buf: array of kbase_io_access
+ */
+struct kbase_io_history {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool enabled;
+#else
+	u32 enabled;
+#endif
+
+	spinlock_t lock;
+	size_t count;
+	u16 size;
+	struct kbase_io_access *buf;
+};
+
+/**
+ * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the
+ *                           dependee atom.
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: readonly reference to dependee atom.
+ */
+static inline const struct kbase_jd_atom *
+kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return (const struct kbase_jd_atom *)(dep->atom);
+}
+
+/**
+ * kbase_jd_katom_dep_type -  Retrieves the dependency type info
+ *
+ * @dep:   pointer to the dependency info structure.
+ *
+ * Return: the type of dependency there is on the dependee atom.
+ */
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * kbase_jd_katom_dep_set - sets up the dependency info structure
+ *                          as per the values passed.
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ * @a:            pointer to the dependee atom.
+ * @type:         type of dependency there is on the dependee atom.
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+		struct kbase_jd_atom *a, u8 type)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = a;
+	dep->dep_type = type;
+}
+
+/**
+ * kbase_jd_katom_dep_clear - resets the dependency info structure
+ *
+ * @const_dep:    pointer to the dependency info structure to be setup.
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+	struct kbase_jd_atom_dependency *dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+/**
+ * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes
+ *                                runnable, with respect to job slot ringbuffer/fifo.
+ * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which
+ *                                implies that either atom has not become runnable
+ *                                due to dependency or has completed the execution
+ *                                on GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked
+ *                                due to cross slot dependency, can't be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but
+ *                                is waiting for the completion of previously added atoms
+ *                                in current & other slots, as their protected mode
+ *                                requirements do not match with the current atom.
+ * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is
+ *                                waiting for completion of protected mode transition,
+ *                                needed before the atom is submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting
+ *                                for the cores, which are needed to execute the job
+ *                                chain represented by the atom, to become available
+ * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on
+ *                                affinity due to rmu workaround for Hw issue 8987.
+ * @KBASE_ATOM_GPU_RB_READY:      Atom is in slot fifo and can be submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_SUBMITTED:  Atom is in slot fifo and has been submitted to GPU.
+ * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure,
+ *                                but only after the previously added atoms in fifo
+ *                                have completed or have also been returned to JS.
+ */
+enum kbase_atom_gpu_rb_state {
+	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	KBASE_ATOM_GPU_RB_READY,
+	KBASE_ATOM_GPU_RB_SUBMITTED,
+	KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
+};
+
+/**
+ * enum kbase_atom_enter_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's entry into protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the coherency change. L2 shall be powered down and GPU shall
+ *                      come out of fully coherent mode before entering protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change;
+ *                      for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that
+ *                      coherency register contains correct value when GPU enters
+ *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check
+ *                      that L2 is powered up and switch GPU to protected mode.
+ */
+enum kbase_atom_enter_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
+	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
+	KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
+	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
+};
+
+/**
+ * enum kbase_atom_exit_protected_state - The state of an atom with respect to the
+ *                      preparation for GPU's exit from protected mode, becomes
+ *                      pertinent only after atom's state with respect to slot
+ *                      ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
+ * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms
+ *                      currently submitted to GPU and protected mode transition is
+ *                      not already in progress.
+ * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
+ *                      for the reset, as exiting protected mode requires a reset.
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode
+ * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete
+ */
+enum kbase_atom_exit_protected_state {
+	/**
+	 * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
+/**
+ * struct kbase_ext_res - Contains the info for external resources referred
+ *                        by an atom, which have been mapped on GPU side.
+ * @gpu_address:          Start address of the memory region allocated for
+ *                        the resource from GPU virtual address space.
+ * @alloc:                pointer to physical pages tracking object, set on
+ *                        mapping the external resource on GPU side.
+ */
+struct kbase_ext_res {
+	u64 gpu_address;
+	struct kbase_mem_phy_alloc *alloc;
+};
+
+/**
+ * struct kbase_jd_atom  - object representing the atom, containing the complete
+ *                         state and attributes of an atom.
+ * @work:                  work item for the bottom half processing of the atom,
+ *                         by JD or JS, after it got executed on GPU or the input
+ *                         fence got signaled
+ * @start_timestamp:       time at which the atom was submitted to the GPU, by
+ *                         updating the JS_HEAD_NEXTn register.
+ * @udata:                 copy of the user data sent for the atom in base_jd_submit.
+ * @kctx:                  Pointer to the base context with which the atom is associated.
+ * @dep_head:              Array of 2 list heads, pointing to the two list of atoms
+ *                         which are blocked due to dependency on this atom.
+ * @dep_item:              Array of 2 list heads, used to store the atom in the list of
+ *                         other atoms depending on the same dependee atom.
+ * @dep:                   Array containing the dependency info for the 2 atoms on which
+ *                         the atom depends upon.
+ * @jd_item:               List head used during job dispatch job_done processing - as
+ *                         dependencies may not be entirely resolved at this point,
+ *                         we need to use a separate list head.
+ * @in_jd_list:            flag set to true if atom's @jd_item is currently on a list,
+ *                         prevents atom being processed twice.
+ * @nr_extres:             number of external resources referenced by the atom.
+ * @extres:                pointer to the location containing info about @nr_extres
+ *                         external resources referenced by the atom.
+ * @device_nr:             indicates the coregroup with which the atom is associated,
+ *                         when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
+ * @jc:                    GPU address of the job-chain.
+ * @softjob_data:          Copy of data read from the user space buffer that @jc
+ *                         points to.
+ * @coreref_state:         state of the atom with respect to retention of shader
+ *                         cores for affinity & power management.
+ * @fence:                 Stores either an input or output sync fence, depending
+ *                         on soft-job type
+ * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
+ *                         callback function on signaling of the input fence.
+ * @dma_fence:             object containing pointers to both input & output fences
+ *                         and other related members used for explicit sync through
+ *                         soft jobs and for the implicit synchronization required
+ *                         on access to external resources.
+ * @event_code:            Event code for the job chain represented by the atom, both
+ *                         HW and low-level SW events are represented by event codes.
+ * @core_req:              bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw
+ *                         requirements for the job chain represented by the atom.
+ * @ticks:                 Number of scheduling ticks for which atom has been running
+ *                         on the GPU.
+ * @sched_priority:        Priority of the atom for Job scheduling, as per the
+ *                         KBASE_JS_ATOM_SCHED_PRIO_*.
+ * @poking:                Indicates whether poking of MMU is ongoing for the atom,
+ *                         as a WA for the issue HW_ISSUE_8316.
+ * @completed:             Wait queue to wait upon for the completion of atom.
+ * @status:                Indicates at high level at what stage the atom is in,
+ *                         as per KBASE_JD_ATOM_STATE_*, that whether it is not in
+ *                         use or its queued in JD or given to JS or submitted to Hw
+ *                         or it completed the execution on Hw.
+ * @work_id:               used for GPU tracepoints, its a snapshot of the 'work_id'
+ *                         counter in kbase_jd_context which is incremented on
+ *                         every call to base_jd_submit.
+ * @slot_nr:               Job slot chosen for the atom.
+ * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
+ *                         low level state of the atom.
+ * @retry_count:           Number of times this atom has been retried. Used by replay
+ *                         soft job.
+ * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
+ *                         atom's state after it has entered Job scheduler on becoming
+ *                         runnable. Atom could be blocked due to cross slot dependency
+ *                         or waiting for the shader cores to become available or
+ *                         waiting for protected mode transitions to complete.
+ * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU
+ *                         cache is needed for the atom and the shader cores used
+ *                         for atom have been kept on.
+ * @blocked:               flag indicating that atom's resubmission to GPU is
+ *                         blocked till the work item is scheduled to return the
+ *                         atom to JS.
+ * @pre_dep:               Pointer to atom that this atom has same-slot dependency on
+ * @post_dep:              Pointer to atom that has same-slot dependency on this atom
+ * @x_pre_dep:             Pointer to atom that this atom has cross-slot dependency on
+ * @x_post_dep:            Pointer to atom that has cross-slot dependency on this atom
+ * @flush_id:              The GPU's flush count recorded at the time of submission,
+ *                         used for the cache flush optimisation
+ * @fault_event:           Info for dumping the debug data on Job fault.
+ * @queue:                 List head used for 4 different purposes :
+ *                         Adds atom to the list of dma-buf fence waiting atoms.
+ *                         Adds atom to the list of atoms blocked due to cross
+ *                         slot dependency.
+ *                         Adds atom to the list of softjob atoms for which JIT
+ *                         allocation has been deferred
+ *                         Adds atom to the list of softjob atoms waiting for the
+ *                         signaling of fence.
+ * @jit_node:              Used to keep track of all JIT free/alloc jobs in submission order
+ * @jit_blocked:           Flag indicating that JIT allocation requested through
+ *                         softjob atom will be reattempted after the impending
+ *                         free of other active JIT allocations.
+ * @will_fail_event_code:  If non-zero, this indicates that the atom will fail
+ *                         with the set event_code when the atom is processed.
+ *                         Used for special handling of atoms, which have a data
+ *                         dependency on the failed atoms.
+ * @protected_state:       State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*,
+ *                         when transitioning into or out of protected mode. Atom will
+ *                         be either entering or exiting the protected mode.
+ * @runnable_tree_node:    The node added to context's job slot specific rb tree
+ *                         when the atom becomes runnable.
+ * @age:                   Age of atom relative to other atoms in the context, is
+ *                         snapshot of the age_count counter in kbase context.
+ */
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+	struct list_head jd_item;
+	bool in_jd_list;
+
+	u16 nr_extres;
+	struct kbase_ext_res *extres;
+
+	u32 device_nr;
+	u64 jc;
+	void *softjob_data;
+	enum kbase_atom_coreref_state coreref_state;
+#if defined(CONFIG_SYNC)
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+	struct {
+		/* Use the functions/API defined in mali_kbase_fence.h to
+		 * when working with this sub struct */
+#if defined(CONFIG_SYNC_FILE)
+		/* Input fence */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence_in;
+#else
+		struct dma_fence *fence_in;
+#endif
+#endif
+		/* This points to the dma-buf output fence for this atom. If
+		 * this is NULL then there is no fence for this atom and the
+		 * following fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+		struct fence *fence;
+#else
+		struct dma_fence *fence;
+#endif
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;
+
+	u32 ticks;
+	int sched_priority;
+
+	int poking;
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	int slot_nr;
+
+	u32 atom_flags;
+
+	int retry_count;
+
+	enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+	bool need_cache_flush_cores_retained;
+
+	atomic_t blocked;
+
+	struct kbase_jd_atom *pre_dep;
+	struct kbase_jd_atom *post_dep;
+
+	struct kbase_jd_atom *x_pre_dep;
+	struct kbase_jd_atom *x_post_dep;
+
+	u32 flush_id;
+
+#ifdef CONFIG_DEBUG_FS
+	struct base_job_fault_event fault_event;
+#endif
+
+	struct list_head queue;
+
+	struct list_head jit_node;
+	bool jit_blocked;
+
+	enum base_jd_event_code will_fail_event_code;
+
+	union {
+		enum kbase_atom_enter_protected_state enter;
+		enum kbase_atom_exit_protected_state exit;
+	} protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	u32 age;
+};
+
+/**
+ * struct kbase_debug_copy_buffer - information about the buffer to be copied.
+ *
+ * @size:	size of the buffer in bytes
+ * @pages:	pointer to an array of pointers to the pages which contain
+ *		the buffer
+ * @nr_pages:	number of pages
+ * @offset:	offset into the pages
+ * @gpu_alloc:	pointer to physical memory allocated by the GPU
+ * @extres_pages: array of pointers to the pages containing external resources
+ *		for this buffer
+ * @nr_extres_pages: number of pages in @extres_pages
+ */
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
+{
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+/**
+ * struct kbase_jd_context  - per context object encapsulating all the Job dispatcher
+ *                            related state.
+ * @lock:                     lock to serialize the updates made to the Job dispatcher
+ *                            state and kbase_jd_atom objects.
+ * @sched_info:               Structure encapsulating all the Job scheduling info.
+ * @atoms:                    Array of the objects representing atoms, containing
+ *                            the complete state and attributes of an atom.
+ * @job_nr:                   Tracks the number of atoms being processed by the
+ *                            kbase. This includes atoms that are not tracked by
+ *                            scheduler: 'not ready to run' & 'dependency-only' jobs.
+ * @zero_jobs_wait:           Waitq that reflects whether there are no jobs
+ *                            (including SW-only dependency jobs). This is set
+ *                            when no jobs are present on the ctx, and clear when
+ *                            there are jobs.
+ *                            This must be updated atomically with @job_nr.
+ *                            note: Job Dispatcher knows about more jobs than the
+ *                            Job Scheduler as it is unaware of jobs that are
+ *                            blocked on dependencies and SW-only dependency jobs.
+ *                            This waitq can be waited upon to find out when the
+ *                            context jobs are all done/cancelled (including those
+ *                            that might've been blocked on dependencies) - and so,
+ *                            whether it can be terminated. However, it should only
+ *                            be terminated once it is not present in the run-pool.
+ *                            Since the waitq is only set under @lock, the waiter
+ *                            should also briefly obtain and drop @lock to guarantee
+ *                            that the setter has completed its work on the kbase_context
+ * @job_done_wq:              Workqueue to which the per atom work item is queued
+ *                            for bottom half processing when the atom completes
+ *                            execution on GPU or the input fence get signaled.
+ * @tb_lock:                  Lock to serialize the write access made to @tb to
+ *                            to store the register access trace messages.
+ * @tb:                       Pointer to the Userspace accessible buffer storing
+ *                            the trace messages for register read/write accesses
+ *                            made by the Kbase. The buffer is filled in circular
+ *                            fashion.
+ * @tb_wrap_offset:           Offset to the end location in the trace buffer, the
+ *                            write pointer is moved to the beginning on reaching
+ *                            this offset.
+ * @work_id:                  atomic variable used for GPU tracepoints, incremented
+ *                            on every call to base_jd_submit.
+ */
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	u32 job_nr;
+
+	wait_queue_head_t zero_jobs_wait;
+
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+	u64	transcfg;
+};
+
+/**
+ * struct kbase_as   - object representing an address space of GPU.
+ * @number:            Index at which this address space structure is present
+ *                     in an array of address space structures embedded inside the
+ *                     struct kbase_device.
+ * @pf_wq:             Workqueue for processing work items related to Bus fault
+ *                     and Page fault handling.
+ * @work_pagefault:    Work item for the Page fault handling.
+ * @work_busfault:     Work item for the Bus fault handling.
+ * @fault_type:        Type of fault which occured for this address space,
+ *                     regular/unexpected Bus or Page fault.
+ * @protected_mode:    Flag indicating whether the fault occurred in protected
+ *                     mode or not.
+ * @fault_status:      Records the fault status as reported by Hw.
+ * @fault_addr:        Records the faulting address.
+ * @fault_extra_addr:  Records the secondary fault address.
+ * @current_setup:     Stores the MMU configuration for this address space.
+ * @poke_wq:           Workqueue to process the work items queue for poking the
+ *                     MMU as a WA for BASE_HW_ISSUE_8316.
+ * @poke_work:         Work item to do the poking of MMU for this address space.
+ * @poke_refcount:     Refcount for the need of poking MMU. While the refcount is
+ *                     non zero the poking of MMU will continue.
+ *                     Protected by hwaccess_lock.
+ * @poke_state:        State indicating whether poking is in progress or it has
+ *                     been stopped. Protected by hwaccess_lock.
+ * @poke_timer:        Timer used to schedule the poking at regular intervals.
+ */
+struct kbase_as {
+	int number;
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	bool protected_mode;
+	u32 fault_status;
+	u64 fault_addr;
+	u64 fault_extra_addr;
+	struct kbase_mmu_setup current_setup;
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	int poke_refcount;
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+/**
+ * struct kbase_mmu_table  - object representing a set of GPU page tables
+ * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries
+ *                        of top & intermediate level page tables to avoid
+ *                        repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
+ *                        page tables
+ * @pgd:                  Physical address of the page allocated for the top
+ *                        level page table of the context, this is used for
+ *                        MMU HW programming as the address translation will
+ *                        start from the top level page table.
+ * @kctx:                 If this set of MMU tables belongs to a context then
+ *                        this is a back-reference to the context, otherwise
+ *                        it is NULL
+ */
+struct kbase_mmu_table {
+	u64 *mmu_teardown_pages;
+	struct mutex mmu_lock;
+	phys_addr_t pgd;
+	struct kbase_context *kctx;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+/**
+ * struct kbase_trace - object representing a trace message added to trace buffer
+ *                      kbase_device::trace_rbuf
+ * @timestamp:          CPU timestamp at which the trace message was added.
+ * @thread_id:          id of the thread in the context of which trace message
+ *                      was added.
+ * @cpu:                indicates which CPU the @thread_id was scheduled on when
+ *                      the trace message was added.
+ * @ctx:                Pointer to the kbase context for which the trace message
+ *                      was added. Will be NULL for certain trace messages like
+ *                      for traces added corresponding to power management events.
+ *                      Will point to the appropriate context corresponding to
+ *                      job-slot & context's reference count related events.
+ * @katom:              indicates if the trace message has atom related info.
+ * @atom_number:        id of the atom for which trace message was added.
+ *                      Only valid if @katom is true.
+ * @atom_udata:         Copy of the user data sent for the atom in base_jd_submit.
+ *                      Only valid if @katom is true.
+ * @gpu_addr:           GPU address of the job-chain represented by atom. Could
+ *                      be valid even if @katom is false.
+ * @info_val:           value specific to the type of event being traced. For the
+ *                      case where @katom is true, will be set to atom's affinity,
+ *                      i.e. bitmask of shader cores chosen for atom's execution.
+ * @code:               Identifies the event, refer enum kbase_trace_code.
+ * @jobslot:            job-slot for which trace message was added, valid only for
+ *                      job-slot management events.
+ * @refcount:           reference count for the context, valid for certain events
+ *                      related to scheduler core and policy.
+ * @flags:              indicates if info related to @jobslot & @refcount is present
+ *                      in the trace message, used during dumping of the message.
+ */
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+	/**
+	 * The lock protecting Power Management structures accessed outside of
+	 * IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or
+	 * off.
+	 */
+	struct mutex lock;
+
+	/**
+	 * The reference count of active contexts on this device. Note that
+	 * some code paths keep shaders/the tiler powered whilst this is 0. Use
+	 * kbase_pm_is_active() instead to check for such cases.
+	 */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs. One mask per job slot.
+	 */
+	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+	u64 debug_core_mask_all;
+
+	/**
+	 * Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 *
+	 * @return 0 on success, else error code
+	 */
+	 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/**
+	 * Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/* Time in milliseconds between each dvfs sample */
+	u32 dvfs_period;
+
+	/* Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/* Number of ticks of GPU poweroff timer before shader is powered off */
+	int poweroff_shader_ticks;
+
+	/* Number of ticks of GPU poweroff timer before GPU is powered off */
+	int poweroff_gpu_ticks;
+
+	struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
+ * @kbdev:        Kbase device where memory is used
+ * @cur_size:     Number of free pages currently in the pool (may exceed
+ *                @max_size in some corner cases)
+ * @max_size:     Maximum number of free pages in the pool
+ * @order:        order = 0 refers to a pool of 4 KB pages
+ *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @pool_lock:    Lock protecting the pool - must be held when modifying
+ *                @cur_size and @page_list
+ * @page_list:    List of free pages in the pool
+ * @reclaim:      Shrinker for kernel reclaim of free pages
+ * @next_pool:    Pointer to next pool where pages can be allocated when this
+ *                pool is empty. Pages will spill over to the next pool when
+ *                this pool is full. Can be NULL if there is no next pool.
+ * @dying:        true if the pool is being terminated, and any ongoing
+ *                operations should be abandoned
+ * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
+ *                this pool, eg during a grow operation
+ */
+struct kbase_mem_pool {
+	struct kbase_device *kbdev;
+	size_t              cur_size;
+	size_t              max_size;
+	size_t		    order;
+	spinlock_t          pool_lock;
+	struct list_head    page_list;
+	struct shrinker     reclaim;
+
+	struct kbase_mem_pool *next_pool;
+
+	bool dying;
+	bool dont_reclaim;
+};
+
+/**
+ * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
+ *                            frequency, and real frequency and core mask
+ * @opp_freq:  Nominal OPP frequency
+ * @real_freq: Real GPU frequency
+ * @core_mask: Shader core mask
+ */
+struct kbase_devfreq_opp {
+	u64 opp_freq;
+	u64 real_freq;
+	u64 core_mask;
+};
+
+/* MMU mode flags */
+#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */
+
+/**
+ * struct kbase_mmu_mode - object containing pointer to methods invoked for
+ *                         programming the MMU, as per the MMU mode supported
+ *                         by Hw.
+ * @update:           enable & setup/configure one of the GPU address space.
+ * @get_as_setup:     retrieve the configuration of one of the GPU address space.
+ * @disable_as:       disable one of the GPU address space.
+ * @pte_to_phy_addr:  retrieve the physical address encoded in the page table entry.
+ * @ate_is_valid:     check if the pte is a valid address translation entry
+ *                    encoding the physical address of the actual mapped page.
+ * @pte_is_valid:     check if the pte is a valid entry encoding the physical
+ *                    address of the next lower level page table.
+ * @entry_set_ate:    program the pte to be a valid address translation entry to
+ *                    encode the physical address of the actual page being mapped.
+ * @entry_set_pte:    program the pte to be a valid entry to encode the physical
+ *                    address of the next lower level page table.
+ * @entry_invalidate: clear out or invalidate the pte.
+ * @flags:            bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
+ */
+struct kbase_mmu_mode {
+	void (*update)(struct kbase_device *kbdev,
+			struct kbase_mmu_table *mmut,
+			int as_nr);
+	void (*get_as_setup)(struct kbase_mmu_table *mmut,
+			struct kbase_mmu_setup * const setup);
+	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+	phys_addr_t (*pte_to_phy_addr)(u64 entry);
+	int (*ate_is_valid)(u64 ate, unsigned int level);
+	int (*pte_is_valid)(u64 pte, unsigned int level);
+	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
+			unsigned long flags, unsigned int level);
+	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+	void (*entry_invalidate)(u64 *entry);
+	unsigned long flags;
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+
+#define DEVNAME_SIZE	16
+
+
+/**
+ * struct kbase_device   - Object representing an instance of GPU platform device,
+ *                         allocated from the probe method of mali driver.
+ * @hw_quirks_sc:          Configuration to be used for the shader cores as per
+ *                         the HW issues present in the GPU.
+ * @hw_quirks_tiler:       Configuration to be used for the Tiler as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_mmu:         Configuration to be used for the MMU as per the HW
+ *                         issues present in the GPU.
+ * @hw_quirks_jm:          Configuration to be used for the Job Manager as per
+ *                         the HW issues present in the GPU.
+ * @entry:                 Links the device instance to the global list of GPU
+ *                         devices. The list would have as many entries as there
+ *                         are GPU device instances.
+ * @dev:                   Pointer to the kernel's generic/base representation
+ *                         of the GPU platform device.
+ * @mdev:                  Pointer to the miscellaneous device registered to
+ *                         provide Userspace access to kernel driver through the
+ *                         device file /dev/malixx.
+ * @reg_start:             Base address of the region in physical address space
+ *                         where GPU registers have been mapped.
+ * @reg_size:              Size of the region containing GPU registers
+ * @reg:                   Kernel virtual address of the region containing GPU
+ *                         registers, using which Driver will access the registers.
+ * @irqs:                  Array containing IRQ resource info for 3 types of
+ *                         interrupts : Job scheduling, MMU & GPU events (like
+ *                         power management, cache etc.)
+ * @clock:                 Pointer to the input clock resource (having an id of 0),
+ *                         referenced by the GPU device node.
+ * @regulator:             Pointer to the struct corresponding to the regulator
+ *                         for GPU device
+ * @devname:               string containing the name used for GPU device instance,
+ *                         miscellaneous device is registered using the same name.
+ * @model:                 Pointer, valid only when Driver is compiled to not access
+ *                         the real GPU Hw, to the dummy model which tries to mimic
+ *                         to some extent the state & behavior of GPU Hw in response
+ *                         to the register accesses made by the Driver.
+ * @irq_slab:              slab cache for allocating the work items queued when
+ *                         model mimics raising of IRQ to cause an interrupt on CPU.
+ * @irq_workq:             workqueue for processing the irq work items.
+ * @serving_job_irq:       function to execute work items queued when model mimics
+ *                         the raising of JS irq, mimics the interrupt handler
+ *                         processing JS interrupts.
+ * @serving_gpu_irq:       function to execute work items queued when model mimics
+ *                         the raising of GPU irq, mimics the interrupt handler
+ *                         processing GPU interrupts.
+ * @serving_mmu_irq:       function to execute work items queued when model mimics
+ *                         the raising of MMU irq, mimics the interrupt handler
+ *                         processing MMU interrupts.
+ * @reg_op_lock:           lock used by model to serialize the handling of register
+ *                         accesses made by the driver.
+ * @pm:                    Per device object for storing data for power management
+ *                         framework.
+ * @js_data:               Per device object encapsulating the current context of
+ *                         Job Scheduler, which is global to the device and is not
+ *                         tied to any particular struct kbase_context running on
+ *                         the device
+ * @mem_pool:              Object containing the state for global pool of 4KB size
+ *                         physical pages which can be used by all the contexts.
+ * @lp_mem_pool:           Object containing the state for global pool of 2MB size
+ *                         physical pages which can be used by all the contexts.
+ * @memdev:                keeps track of the in use physical pages allocated by
+ *                         the Driver.
+ * @mmu_mode:              Pointer to the object containing methods for programming
+ *                         the MMU, depending on the type of MMU supported by Hw.
+ * @as:                    Array of objects representing address spaces of GPU.
+ * @as_free:               Bitpattern of free/available GPU address spaces.
+ * @as_to_kctx:            Array of pointers to struct kbase_context, having
+ *                         GPU adrress spaces assigned to them.
+ * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
+ *                         register used in the handling of Bus & Page faults.
+ * @gpu_props:             Object containing complete information about the
+ *                         configuration/properties of GPU HW device in use.
+ * @hw_issues_mask:        List of SW workarounds for HW issues
+ * @hw_features_mask:      List of available HW features.
+ * @shader_needed_cnt:     Count for the 64 shader cores, incremented when
+ *                         shaders are requested for use and decremented later
+ *                         when they are no longer required.
+ * @tiler_needed_cnt:      Count for the Tiler block shader cores, incremented
+ *                         when Tiler is requested for use and decremented
+ *                         later when the Tiler is no longer required.
+ * @disjoint_event:        struct for keeping track of the disjoint information,
+ *                         that whether the GPU is in a disjoint state and the
+ *                         number of disjoint events that have occurred on GPU.
+ * @l2_users_count:        Refcount for tracking users of the l2 cache, e.g.
+ *                         when using hardware counter instrumentation.
+ * @shader_available_bitmap: Bitmap of shader cores that are currently available,
+ *                         powered up and the power policy is happy for jobs
+ *                         to be submitted to these cores. These are updated
+ *                         by the power management code. The job scheduler
+ *                         should avoid submitting new jobs to any cores
+ *                         that are not marked as available.
+ * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
+ * @l2_available_bitmap:    Bitmap of the currently available Level 2 caches.
+ * @stack_available_bitmap: Bitmap of the currently available Core stacks.
+ * @shader_ready_bitmap:    Bitmap of shader cores that are ready (powered on)
+ * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
+ *                         power state.
+ * @nr_hw_address_spaces:  Number of address spaces actually available in the
+ *                         GPU, remains constant after driver initialisation.
+ * @nr_user_address_spaces: Number of address spaces available to user contexts
+ * @hwcnt:                  Structure used for instrumentation and HW counters
+ *                         dumping
+ * @vinstr_ctx:            vinstr context created per device
+ * @trace_lock:            Lock to serialize the access to trace buffer.
+ * @trace_first_out:       Index/offset in the trace buffer at which the first
+ *                         unread message is present.
+ * @trace_next_in:         Index/offset in the trace buffer at which the new
+ *                         message will be written.
+ * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
+ *                         tracing the various events in Driver.
+ *                         The buffer is filled in circular fashion.
+ * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
+ *                         complete for the GPU jobs before proceeding with the
+ *                         GPU reset.
+ * @cacheclean_lock:       Lock to serialize the clean & invalidation of GPU caches,
+ *                         between Job Manager backend & Instrumentation code.
+ * @platform_context:      Platform specific private data to be accessed by
+ *                         platform specific config files only.
+ * @kctx_list:             List of kbase_contexts created for the device, including
+ *                         the kbase_context created for vinstr_ctx.
+ * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
+ * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
+ *                         to devfreq_add_device() to add devfreq feature to Mali
+ *                         GPU device.
+ * @devfreq:               Pointer to devfreq structure for Mali GPU device,
+ *                         returned on the call to devfreq_add_device().
+ * @current_freq:          The real frequency, corresponding to @current_nominal_freq,
+ *                         at which the Mali GPU device is currently operating, as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
+ *                         device as retrieved through devfreq_recommended_opp()
+ *                         using the freq value passed as an argument to target
+ *                         callback of @devfreq_profile
+ * @current_voltage:       The voltage corresponding to @current_nominal_freq, as
+ *                         retrieved through dev_pm_opp_get_voltage().
+ * @current_core_mask:     bitmask of shader cores that are currently desired &
+ *                         enabled, corresponding to @current_nominal_freq as
+ *                         retrieved from @opp_table in the target callback of
+ *                         @devfreq_profile.
+ * @opp_table:             Pointer to the lookup table for converting between nominal
+ *                         OPP (operating performance point) frequency, and real
+ *                         frequency and core mask. This table is constructed according
+ *                         to operating-points-v2-mali table in devicetree.
+ * @num_opps:              Number of operating performance points available for the Mali
+ *                         GPU device.
+ * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
+ *                         corresponding to @devfreq.
+ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
+ *                         mode. It is a sticky flag which is cleared by IPA
+ *                         once it has made use of information that GPU had
+ *                         previously entered protected mode.
+ * @ipa:                   Top level structure for IPA, containing pointers to both
+ *                         configured & fallback models.
+ * @timeline:              Stores the global timeline tracking information.
+ * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
+ *                         set when the 'job_fault' debugfs file is opened.
+ * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
+ * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
+ *                         a sub-directory for every context.
+ * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
+ *                         has occurred.
+ * @job_fault_wq:          Waitqueue to block the job fault dumping daemon till the
+ *                         occurrence of a job fault.
+ * @job_fault_resume_wq:   Waitqueue on which every context with a faulty job wait
+ *                         for the job fault dumping to complete before they can
+ *                         do bottom half of job done for the atoms which followed
+ *                         the faulty atom.
+ * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
+ *                         atoms, whereby the work item function waits for the dumping
+ *                         to get completed.
+ * @job_fault_event_list:  List of atoms, each belonging to a different context, which
+ *                         generated a job fault.
+ * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
+ * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
+ *                         file "read_register".
+ * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer
+ *                         dumping and s/w counter reporting.
+ * @force_replay_limit:    Number of gpu jobs, having replay atoms associated with them,
+ *                         that are run before a job is forced to fail and replay.
+ *                         Set to 0 to disable forced failures.
+ * @force_replay_count:    Count of gpu jobs, having replay atoms associated with them,
+ *                         between forced failures. Incremented on each gpu job which
+ *                         has replay atoms dependent on it. A gpu job is forced to
+ *                         fail once this is greater than or equal to @force_replay_limit
+ * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay
+ *                         job atoms to consider the associated gpu job for forceful
+ *                         failure and replay. May be zero
+ * @force_replay_random:   Set to 1 to randomize the @force_replay_limit, in the
+ *                         range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+ * @ctx_num:               Total number of contexts created for the device.
+ * @io_history:            Pointer to an object keeping a track of all recent
+ *                         register accesses. The history of register accesses
+ *                         can be read through "regs_history" debugfs file.
+ * @hwaccess:              Contains a pointer to active kbase context and GPU
+ *                         backend specific data for HW access layer.
+ * @faults_pending:        Count of page/bus faults waiting for bottom half processing
+ *                         via workqueues.
+ * @poweroff_pending:      Set when power off operation for GPU is started, reset when
+ *                         power on for GPU is started.
+ * @infinite_cache_active_default: Set to enable using infinite cache for all the
+ *                         allocations of a new context.
+ * @mem_pool_max_size_default: Initial/default value for the maximum size of both
+ *                         types of pool created for a new context.
+ * @current_gpu_coherency_mode: coherency mode in use, which can be different
+ *                         from @system_coherency, when using protected mode.
+ * @system_coherency:      coherency mode as retrieved from the device tree.
+ * @cci_snoop_enabled:     Flag to track when CCI snoops have been enabled.
+ * @snoop_enable_smc:      SMC function ID to call into Trusted firmware to
+ *                         enable cache snooping. Value of 0 indicates that it
+ *                         is not used.
+ * @snoop_disable_smc:     SMC function ID to call disable cache snooping.
+ * @protected_ops:         Pointer to the methods for switching in or out of the
+ *                         protected mode, as per the @protected_dev being used.
+ * @protected_dev:         Pointer to the protected mode switcher device attached
+ *                         to the GPU device retrieved through device tree if
+ *                         GPU do not support protected mode switching natively.
+ * @protected_mode:        set to TRUE when GPU is put into protected mode
+ * @protected_mode_transition: set to TRUE when GPU is transitioning into or
+ *                         out of protected mode.
+ * @protected_mode_support: set to true if protected mode is supported.
+ * @buslogger:              Pointer to the structure required for interfacing
+ *                          with the bus logger module to set the size of buffer
+ *                          used by the module for capturing bus logs.
+ * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
+ *                          IRQ + bottom half is being done, to prevent the writes
+ *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
+ * @inited_subsys:          Bitmap of inited sub systems at the time of device probe.
+ *                          Used during device remove or for handling error in probe.
+ * @hwaccess_lock:          Lock, which can be taken from IRQ context, to serialize
+ *                          the updates made to Job dispatcher + scheduler states.
+ * @mmu_hw_mutex:           Protects access to MMU operations and address space
+ *                          related state.
+ * @serialize_jobs:         Currently used mode for serialization of jobs, both
+ *                          intra & inter slots serialization is supported.
+ * @backup_serialize_jobs:  Copy of the original value of @serialize_jobs taken
+ *                          when GWT is enabled. Used to restore the original value
+ *                          on disabling of GWT.
+ * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
+ *                          Job Scheduler
+ */
+struct kbase_device {
+	u32 hw_quirks_sc;
+	u32 hw_quirks_tiler;
+	u32 hw_quirks_mmu;
+	u32 hw_quirks_jm;
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+
+	struct clk *clock;
+#ifdef CONFIG_REGULATOR
+	struct regulator *regulator;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif	/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+	struct kbasep_mem_device memdev;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	struct kbase_as as[BASE_MAX_NR_AS];
+	u16 as_free; /* Bitpattern of free Address Spaces */
+	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+
+	spinlock_t mmu_mask_change;
+
+	struct kbase_gpu_props gpu_props;
+
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	u32 tiler_needed_cnt;
+	u32 shader_needed_cnt;
+
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	u32 l2_users_count;
+
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+	u64 stack_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;
+	s8 nr_user_address_spaces;
+
+	struct kbase_hwcnt {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+
+		struct kbase_instr_backend backend;
+	} hwcnt;
+
+	struct kbase_vinstr_context *vinstr_ctx;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+	u32 reset_timeout_ms;
+
+	struct mutex cacheclean_lock;
+
+	void *platform_context;
+
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long current_freq;
+	unsigned long current_nominal_freq;
+	unsigned long current_voltage;
+	u64 current_core_mask;
+	struct kbase_devfreq_opp *opp_table;
+	int num_opps;
+	struct kbasep_pm_metrics last_devfreq_metrics;
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
+	struct thermal_cooling_device *devfreq_cooling;
+#endif
+	bool ipa_protection_mode_switched;
+	struct {
+		/* Access to this struct must be with ipa.lock held */
+		struct mutex lock;
+		struct kbase_ipa_model *configured_model;
+		struct kbase_ipa_model *fallback_model;
+
+		/* Values of the PM utilization metrics from last time the
+		 * power model was invoked. The utilization is calculated as
+		 * the difference between last_metrics and the current values.
+		 */
+		struct kbasep_pm_metrics last_metrics;
+
+		/*
+		 * gpu_active_callback - Inform IPA that GPU is now active
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_active_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/*
+		 * gpu_idle_callback - Inform IPA that GPU is now idle
+		 * @model_data: Pointer to model data
+		 */
+		void (*gpu_idle_callback)(
+				struct kbase_ipa_model_vinstr_data *model_data);
+
+		/* Model data to pass to ipa_gpu_active/idle() */
+		struct kbase_ipa_model_vinstr_data *model_data;
+
+		/* true if IPA is currently using vinstr */
+		bool vinstr_active;
+	} ipa;
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	bool job_fault_debug;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *mali_debugfs_directory;
+	struct dentry *debugfs_ctx_directory;
+
+#ifdef CONFIG_MALI_DEBUG
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
+	wait_queue_head_t job_fault_wq;
+	wait_queue_head_t job_fault_resume_wq;
+	struct workqueue_struct *job_fault_resume_workq;
+	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
+
+#if !MALI_CUSTOMER_RELEASE
+	struct {
+		u16 reg_offset;
+	} regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
+#endif /* CONFIG_DEBUG_FS */
+
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	int force_replay_limit;
+	int force_replay_count;
+	base_jd_core_req force_replay_core_req;
+	bool force_replay_random;
+#endif
+
+	atomic_t ctx_num;
+
+#ifdef CONFIG_DEBUG_FS
+	struct kbase_io_history io_history;
+#endif /* CONFIG_DEBUG_FS */
+
+	struct kbase_hwaccess_data hwaccess;
+
+	atomic_t faults_pending;
+
+	bool poweroff_pending;
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
+	u32 infinite_cache_active_default;
+#endif
+	size_t mem_pool_max_size_default;
+
+	u32 current_gpu_coherency_mode;
+	u32 system_coherency;
+
+	bool cci_snoop_enabled;
+
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
+
+	struct protected_mode_ops *protected_ops;
+
+	struct protected_mode_device *protected_dev;
+
+	bool protected_mode;
+
+	bool protected_mode_transition;
+
+	bool protected_mode_support;
+
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	struct bus_logger_client *buslogger;
+#endif
+
+	bool irq_reset_flush;
+
+	u32 inited_subsys;
+
+	spinlock_t hwaccess_lock;
+
+	struct mutex mmu_hw_mutex;
+
+	/* See KBASE_SERIALIZE_* for details */
+	u8 serialize_jobs;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	u8 backup_serialize_jobs;
+#endif
+
+	/* See KBASE_JS_*_PRIORITY_MODE for details. */
+	u32 js_ctx_scheduling_mode;
+
+};
+
+/**
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
+ *
+ * hwaccess_lock must be held when accessing this structure.
+ */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
+};
+
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+/**
+ * enum kbase_context_flags - Flags for kbase contexts
+ *
+ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
+ * process on a 64-bit kernel.
+ *
+ * @KCTX_RUNNABLE_REF: Set when context is counted in
+ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
+ *
+ * @KCTX_ACTIVE: Set when the context is active.
+ *
+ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
+ * context.
+ *
+ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
+ * initialized.
+ *
+ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
+ * allocations. Existing allocations will not change.
+ *
+ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
+ *
+ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
+ * scheduled in.
+ *
+ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
+ * This is only ever updated whilst the jsctx_mutex is held.
+ *
+ * @KCTX_DYING: Set when the context process is in the process of being evicted.
+ *
+ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
+ * context, to disable use of implicit dma-buf fences. This is used to avoid
+ * potential synchronization deadlocks.
+ *
+ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
+ * allocations. For 64-bit clients it is enabled by default, and disabled by
+ * default on 32-bit clients. Being able to clear this flag is only used for
+ * testing purposes of the custom zone allocation on 64-bit user-space builds,
+ * where we also require more control than is available through e.g. the JIT
+ * allocation mechanism. However, the 64-bit user-space client must still
+ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
+ * from it for job slot 0. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
+ * from it for job slot 1. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
+ * from it for job slot 2. This is reset when the context first goes active or
+ * is re-activated on that slot.
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum kbase_context_flags {
+	KCTX_COMPAT = 1U << 0,
+	KCTX_RUNNABLE_REF = 1U << 1,
+	KCTX_ACTIVE = 1U << 2,
+	KCTX_PULLED = 1U << 3,
+	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
+	KCTX_INFINITE_CACHE = 1U << 5,
+	KCTX_SUBMIT_DISABLED = 1U << 6,
+	KCTX_PRIVILEGED = 1U << 7,
+	KCTX_SCHEDULED = 1U << 8,
+	KCTX_DYING = 1U << 9,
+	KCTX_NO_IMPLICIT_SYNC = 1U << 10,
+	KCTX_FORCE_SAME_VA = 1U << 11,
+	KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
+	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
+	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
+};
+
+struct kbase_sub_alloc {
+	struct list_head link;
+	struct page *page;
+	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
+};
+
+
+/**
+ * struct kbase_context - Object representing an entity, among which GPU is
+ *                        scheduled and gets its own GPU address space.
+ *                        Created when the device file /dev/malixx is opened.
+ * @filp:                 Pointer to the struct file corresponding to device file
+ *                        /dev/malixx instance, passed to the file's open method.
+ * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @mmu:                  Structure holding details of the MMU tables for this
+ *                        context
+ * @id:                   Unique indentifier for the context, indicates the number of
+ *                        contexts which have been created for the device so far.
+ * @api_version:          contains the version number for User/kernel interface,
+ *                        used for compatibility check.
+ * @event_list:           list of posted events about completed atoms, to be sent to
+ *                        event handling thread of Userpsace.
+ * @event_coalesce_list:  list containing events corresponding to successive atoms
+ *                        which have requested deferred delivery of the completion
+ *                        events to Userspace.
+ * @event_mutex:          Lock to protect the concurrent access to @event_list &
+ *                        @event_mutex.
+ * @event_closed:         Flag set through POST_TERM ioctl, indicates that Driver
+ *                        should stop posting events and also inform event handling
+ *                        thread that context termination is in progress.
+ * @event_workq:          Workqueue for processing work items corresponding to atoms
+ *                        that do not return an event to Userspace or have to perform
+ *                        a replay job
+ * @event_count:          Count of the posted events to be consumed by Userspace.
+ * @event_coalesce_count: Count of the events present in @event_coalesce_list.
+ * @flags:                bitmap of enums from kbase_context_flags, indicating the
+ *                        state & attributes for the context.
+ * @setup_complete:       Indicates if the setup for context has completed, i.e.
+ *                        flags have been set for the context. Driver allows only
+ *                        2 ioctls until the setup is done. Valid only for
+ *                        @api_version value 0.
+ * @setup_in_progress:    Indicates if the context's setup is in progress and other
+ *                        setup calls during that shall be rejected.
+ * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
+ *                        which can alias number of memory regions. The page is
+ *                        represent a region where it is mapped with a write-alloc
+ *                        cache setup, typically used when the write result of the
+ *                        GPU isn't needed, but the GPU must write anyway.
+ * @mem_partials_lock:    Lock for protecting the operations done on the elements
+ *                        added to @mem_partials list.
+ * @mem_partials:         List head for the list of large pages, 2MB in size, which
+ *                        which have been split into 4 KB pages and are used
+ *                        partially for the allocations >= 2 MB in size.
+ * @reg_lock:             Lock used for GPU virtual address space management operations,
+ *                        like adding/freeing a memory region in the address space.
+ *                        Can be converted to a rwlock ?.
+ * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
+ *                        zone of the GPU virtual address space. Used for allocations
+ *                        having the same value for GPU & CPU virtual address.
+ * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
+ *                        zone of the GPU virtual address space.
+ * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
+ *                        SAME_VA allocations to defer the reservation of memory region
+ *                        (from the GPU virtual address space) from base_mem_alloc
+ *                        ioctl to mmap system call. This helps returning unique
+ *                        handles, disguised as GPU VA, to Userspace from base_mem_alloc
+ *                        and later retrieving the pointer to memory region structure
+ *                        in the mmap handler.
+ * @pending_regions:      Array containing pointers to memory region structures,
+ *                        used in conjunction with @cookies bitmask mainly for
+ *                        providing a mechansim to have the same value for CPU &
+ *                        GPU virtual address.
+ * @event_queue:          Wait queue used for blocking the thread, which consumes
+ *                        the base_jd_event corresponding to an atom, when there
+ *                        are no more posted events.
+ * @tgid:                 thread group id of the process, whose thread opened the
+ *                        device file /dev/malixx instance to create a context.
+ * @pid:                  id of the thread, corresponding to process @tgid, which
+ *                        actually which opened the device file.
+ * @jctx:                 object encapsulating all the Job dispatcher related state,
+ *                        including the array of atoms.
+ * @used_pages:           Keeps a track of the number of 4KB physical pages in use
+ *                        for the context.
+ * @nonmapped_pages:      Updated in the same way as @used_pages, except for the case
+ *                        when special tracking page is freed by userspace where it
+ *                        is reset to 0.
+ * @permanent_mapped_pages: Usage count of permanently mapped memory
+ * @mem_pool:             Object containing the state for the context specific pool of
+ *                        4KB size physical pages.
+ * @lp_mem_pool:          Object containing the state for the context specific pool of
+ *                        2MB size physical pages.
+ * @reclaim:              Shrinker object registered with the kernel containing
+ *                        the pointer to callback function which is invoked under
+ *                        low memory conditions. In the callback function Driver
+ *                        frees up the memory for allocations marked as
+ *                        evictable/reclaimable.
+ * @evict_list:           List head for the list containing the allocations which
+ *                        can be evicted or freed up in the shrinker callback.
+ * @waiting_soft_jobs:    List head for the list containing softjob atoms, which
+ *                        are either waiting for the event set operation, or waiting
+ *                        for the signaling of input fence or waiting for the GPU
+ *                        device to powered on so as to dump the CPU/GPU timestamps.
+ * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
+ *                        accesses.
+ * @dma_fence:            Object containing list head for the list of dma-buf fence
+ *                        waiting atoms and the waitqueue to process the work item
+ *                        queued for the atoms blocked on the signaling of dma-buf
+ *                        fences.
+ * @as_nr:                id of the address space being used for the scheduled in
+ *                        context. This is effectively part of the Run Pool, because
+ *                        it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
+ *                        the context is scheduled in. The hwaccess_lock must be held
+ *                        whilst accessing this.
+ *                        If the context relating to this value of as_nr is required,
+ *                        then the context must be retained to ensure that it doesn't
+ *                        disappear whilst it is being used. Alternatively, hwaccess_lock
+ *                        can be held to ensure the context doesn't disappear (but this
+ *                        has restrictions on what other locks can be taken simutaneously).
+ * @refcount:             Keeps track of the number of users of this context. A user
+ *                        can be a job that is available for execution, instrumentation
+ *                        needing to 'pin' a context for counter collection, etc.
+ *                        If the refcount reaches 0 then this context is considered
+ *                        inactive and the previously programmed AS might be cleared
+ *                        at any point.
+ *                        Generally the reference count is incremented when the context
+ *                        is scheduled in and an atom is pulled from the context's per
+ *                        slot runnable tree.
+ * @mm_update_lock:       lock used for handling of special tracking page.
+ * @process_mm:           Pointer to the memory descriptor of the process which
+ *                        created the context. Used for accounting the physical
+ *                        pages used for GPU allocations, done for the context,
+ *                        to the memory consumed by the process.
+ * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
+ * @timeline:             Object tracking the number of atoms currently in flight for
+ *                        the context and thread group id of the process, i.e. @tgid.
+ * @mem_profile_data:     Buffer containing the profiling information provided by
+ *                        Userspace, can be read through the mem_profile debugfs file.
+ * @mem_profile_size:     Size of the @mem_profile_data.
+ * @mem_profile_lock:     Lock to serialize the operations related to mem_profile
+ *                        debugfs file.
+ * @kctx_dentry:          Pointer to the debugfs directory created for every context,
+ *                        inside kbase_device::debugfs_ctx_directory, containing
+ *                        context specific files.
+ * @reg_dump:             Buffer containing a register offset & value pair, used
+ *                        for dumping job fault debug info.
+ * @job_fault_count:      Indicates that a job fault occurred for the context and
+ *                        dumping of its debug info is in progress.
+ * @job_fault_resume_event_list: List containing atoms completed after the faulty
+ *                        atom but before the debug data for faulty atom was dumped.
+ * @jsctx_queue:          Per slot & priority arrays of object containing the root
+ *                        of RB-tree holding currently runnable atoms on the job slot
+ *                        and the head item of the linked list of atoms blocked on
+ *                        cross-slot dependencies.
+ * @atoms_pulled:         Total number of atoms currently pulled from the context.
+ * @atoms_pulled_slot:    Per slot count of the number of atoms currently pulled
+ *                        from the context.
+ * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently
+ *                        pulled from the context. hwaccess_lock shall be held when
+ *                        accessing it.
+ * @blocked_js:           Indicates if the context is blocked from submitting atoms
+ *                        on a slot at a given priority. This is set to true, when
+ *                        the atom corresponding to context is soft/hard stopped or
+ *                        removed from the HEAD_NEXT register in response to
+ *                        soft/hard stop.
+ * @slots_pullable:       Bitmask of slots, indicating the slots for which the
+ *                        context has pullable atoms in the runnable tree.
+ * @work:                 Work structure used for deferred ASID assignment.
+ * @vinstr_cli:           Pointer to the legacy userspace vinstr client, there can
+ *                        be only such client per kbase context.
+ * @vinstr_cli_lock:      Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @completed_jobs:       List containing completed atoms for which base_jd_event is
+ *                        to be posted.
+ * @work_count:           Number of work items, corresponding to atoms, currently
+ *                        pending on job_done workqueue of @jctx.
+ * @soft_job_timeout:     Timer object used for failing/cancelling the waiting
+ *                        soft-jobs which have been blocked for more than the
+ *                        timeout value used for the soft-jobs
+ * @jit_alloc:            Array of 256 pointers to GPU memory regions, used for
+ *                        for JIT allocations.
+ * @jit_max_allocations:  Maximum number of JIT allocations allowed at once.
+ * @jit_current_allocations: Current number of in-flight JIT allocations.
+ * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin
+ * @jit_version:          version number indicating whether userspace is using
+ *                        old or new version of interface for JIT allocations
+ *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
+ *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_active_head:      List containing the JIT allocations which are in use.
+ * @jit_pool_head:        List containing the JIT allocations which have been
+ *                        freed up by userpsace and so not being used by them.
+ *                        Driver caches them to quickly fulfill requests for new
+ *                        JIT allocations. They are released in case of memory
+ *                        pressure as they are put on the @evict_list when they
+ *                        are freed up by userspace.
+ * @jit_destroy_head:     List containing the JIT allocations which were moved to it
+ *                        from @jit_pool_head, in the shrinker callback, after freeing
+ *                        their backing physical pages.
+ * @jit_evict_lock:       Lock used for operations done on JIT allocations and also
+ *                        for accessing @evict_list.
+ * @jit_work:             Work item queued to defer the freeing of memory region when
+ *                        JIT allocation is moved to @jit_destroy_head.
+ * @jit_atoms_head:       A list of the JIT soft-jobs, both alloc & free, in submission
+ *                        order, protected by kbase_jd_context.lock.
+ * @jit_pending_alloc:    A list of JIT alloc soft-jobs for which allocation will be
+ *                        reattempted after the impending free of other active JIT
+ *                        allocations.
+ * @ext_res_meta_head:    A list of sticky external resources which were requested to
+ *                        be mapped on GPU side, through a softjob atom of type
+ *                        EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
+ * @drain_pending:        Used to record that a flush/invalidate of the GPU caches was
+ *                        requested from atomic context, so that the next flush request
+ *                        can wait for the flush of GPU writes.
+ * @age_count:            Counter incremented on every call to jd_submit_atom,
+ *                        atom is assigned the snapshot of this counter, which
+ *                        is used to determine the atom's age when it is added to
+ *                        the runnable RB-tree.
+ * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
+ *                        kbase_context.reg_lock.
+ * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
+ * @gwt_current_list:     A list of addresses for which GPU has generated write faults,
+ *                        after the last snapshot of it was sent to userspace.
+ * @gwt_snapshot_list:    Snapshot of the @gwt_current_list for sending to user space.
+ * @priority:             Indicates the context priority. Used along with @atoms_count
+ *                        for context scheduling, protected by hwaccess_lock.
+ * @atoms_count:          Number of gpu atoms currently in use, per priority
+ */
+struct kbase_context {
+	struct file *filp;
+	struct kbase_device *kbdev;
+	struct kbase_mmu_table mmu;
+
+	u32 id;
+	unsigned long api_version;
+	struct list_head event_list;
+	struct list_head event_coalesce_list;
+	struct mutex event_mutex;
+	atomic_t event_closed;
+	struct workqueue_struct *event_workq;
+	atomic_t event_count;
+	int event_coalesce_count;
+
+	atomic_t flags;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	struct tagged_addr aliasing_sink_page;
+
+	spinlock_t              mem_partials_lock;
+	struct list_head        mem_partials;
+
+	struct mutex            reg_lock;
+	struct rb_root reg_rbtree_same;
+	struct rb_root reg_rbtree_custom;
+
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+	unsigned long permanent_mapped_pages;
+
+	struct kbase_mem_pool mem_pool;
+	struct kbase_mem_pool lp_mem_pool;
+
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+
+	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	int as_nr;
+
+	atomic_t refcount;
+
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct __rcu *process_mm;
+	u64 same_va_end;
+
+#ifdef CONFIG_DEBUG_FS
+	char *mem_profile_data;
+	size_t mem_profile_size;
+	struct mutex mem_profile_lock;
+	struct dentry *kctx_dentry;
+
+	unsigned int *reg_dump;
+	atomic_t job_fault_count;
+	struct list_head job_fault_resume_event_list;
+
+#endif /* CONFIG_DEBUG_FS */
+
+	struct jsctx_queue jsctx_queue
+		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+	atomic_t atoms_pulled;
+	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
+			KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	u32 slots_pullable;
+
+	struct work_struct work;
+
+	struct kbase_vinstr_client *vinstr_cli;
+	struct mutex vinstr_cli_lock;
+
+	struct list_head completed_jobs;
+	atomic_t work_count;
+
+	struct timer_list soft_job_timeout;
+
+	struct kbase_va_region *jit_alloc[256];
+	u8 jit_max_allocations;
+	u8 jit_current_allocations;
+	u8 jit_current_allocations_per_bin[256];
+	u8 jit_version;
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_evict_lock;
+	struct work_struct jit_work;
+
+	struct list_head jit_atoms_head;
+	struct list_head jit_pending_alloc;
+
+	struct list_head ext_res_meta_head;
+
+	atomic_t drain_pending;
+
+	u32 age_count;
+
+	u8 trim_level;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	bool gwt_enabled;
+
+	bool gwt_was_enabled;
+
+	struct list_head gwt_current_list;
+
+	struct list_head gwt_snapshot_list;
+#endif
+
+	int priority;
+	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+};
+
+#ifdef CONFIG_MALI_JOB_DUMP
+/**
+ * struct kbasep_gwt_list_element - Structure used to collect GPU
+ *                                  write faults.
+ * @link:                           List head for adding write faults.
+ * @region:                         Details of the region where we have the
+ *                                  faulting page address.
+ * @page_addr:                      Page address where GPU write fault occurred.
+ * @num_pages:                      The number of pages modified.
+ *
+ * Using this structure all GPU write faults are stored in a list.
+ */
+struct kbasep_gwt_list_element {
+	struct list_head link;
+	struct kbase_va_region *region;
+	u64 page_addr;
+	u64 num_pages;
+};
+
+#endif
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+			(kbdev->system_coherency == COHERENCY_ACE))
+		return true;
+
+	return false;
+}
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 0000000000000..804cf3feab236
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,581 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+	const char format[] = "mali_mmu%d";
+	char name[sizeof(format)];
+	const char poke_format[] = "mali_mmu%d_poker";
+	char poke_name[sizeof(poke_format)];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+	snprintf(name, sizeof(name), format, i);
+
+	kbdev->as[i].number = i;
+	kbdev->as[i].fault_addr = 0ULL;
+
+	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	if (!kbdev->as[i].pf_wq)
+		return -EINVAL;
+
+	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+		struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		if (!kbdev->as[i].poke_wq) {
+			destroy_workqueue(kbdev->as[i].pf_wq);
+			return -EINVAL;
+		}
+		INIT_WORK(poke_work, kbasep_as_do_poke);
+
+		hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+		poke_timer->function = kbasep_as_poke_timer_callback;
+
+		kbdev->as[i].poke_refcount = 0;
+		kbdev->as[i].poke_state = 0u;
+	}
+
+	return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+	destroy_workqueue(kbdev->as[i].pf_wq);
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+	int i, err;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		err = kbase_device_as_init(kbdev, i);
+		if (err)
+			goto free_workqs;
+	}
+
+	return 0;
+
+free_workqs:
+	for (; i > 0; i--)
+		kbase_device_as_term(kbdev, i);
+
+	return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+		kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+	mutex_init(&kbdev->mmu_hw_mutex);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
+	/* Get the list of workarounds for issues on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	err = kbase_hw_set_issues_mask(kbdev);
+	if (err)
+		goto fail;
+
+	/* Set the list of features available on the current HW
+	 * (identified by the GPU_ID register)
+	 */
+	kbase_hw_set_features_mask(kbdev);
+
+	kbase_gpuprops_set_features(kbdev);
+
+	/* On Linux 4.0+, dma coherency is determined from device tree */
+#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	err = dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	err = dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+	if (err)
+		goto dma_set_mask_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	err = kbase_device_all_as_init(kbdev);
+	if (err)
+		goto as_init_failed;
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	err = kbasep_trace_init(kbdev);
+	if (err)
+		goto term_as;
+
+	mutex_init(&kbdev->cacheclean_lock);
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	err = kbase_instr_backend_init(kbdev);
+	if (err)
+		goto term_trace;
+
+	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+		kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+	else
+		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+	return 0;
+term_trace:
+	kbasep_trace_term(kbdev);
+term_as:
+	kbase_device_all_as_term(kbdev);
+as_init_failed:
+dma_set_mask_failed:
+fail:
+	return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbase_instr_backend_term(kbdev);
+
+	kbasep_trace_term(kbdev);
+
+	kbase_device_all_as_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	struct kbase_trace *rbuf;
+
+	rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+	if (!rbuf)
+		return -EINVAL;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = false;
+	} else {
+		trace_msg->katom = true;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev)
+		kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 0000000000000..68eb4ed0715dd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
new file mode 100644
index 0000000000000..9dddde4fe7dec
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -0,0 +1,454 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as
+ * it will be set there.
+ */
+#include "mali_kbase_dma_fence.h"
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
+#include <linux/dma-resv.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/ww_mutex.h>
+
+#include <mali_kbase.h>
+
+static void
+kbase_dma_fence_work(struct work_struct *pwork);
+
+static void
+kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource);
+}
+
+static void
+kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom)
+{
+	list_del(&katom->queue);
+}
+
+static int
+kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info,
+				  struct ww_acquire_ctx *ctx)
+{
+	struct dma_resv *content_res = NULL;
+	unsigned int content_res_idx = 0;
+	unsigned int r;
+	int err = 0;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (r = 0; r < info->dma_fence_resv_count; r++) {
+		if (info->resv_objs[r] == content_res) {
+			content_res = NULL;
+			continue;
+		}
+
+		err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx);
+		if (err)
+			goto error;
+	}
+
+	ww_acquire_done(ctx);
+	return err;
+
+error:
+	content_res_idx = r;
+
+	/* Unlock the locked one ones */
+	while (r--)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+
+	if (content_res)
+		ww_mutex_unlock(&content_res->lock);
+
+	/* If we deadlock try with lock_slow and retry */
+	if (err == -EDEADLK) {
+		content_res = info->resv_objs[content_res_idx];
+		ww_mutex_lock_slow(&content_res->lock, ctx);
+		goto retry;
+	}
+
+	/* If we are here the function failed */
+	ww_acquire_fini(ctx);
+	return err;
+}
+
+static void
+kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
+				    struct ww_acquire_ctx *ctx)
+{
+	unsigned int r;
+
+	for (r = 0; r < info->dma_fence_resv_count; r++)
+		ww_mutex_unlock(&info->resv_objs[r]->lock);
+	ww_acquire_fini(ctx);
+}
+
+/**
+ * kbase_dma_fence_queue_work() - Queue work to handle @katom
+ * @katom: Pointer to atom for which to queue work
+ *
+ * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
+ * submit the atom.
+ */
+static void
+kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	bool ret;
+
+	INIT_WORK(&katom->work, kbase_dma_fence_work);
+	ret = queue_work(kctx->dma_fence.wq, &katom->work);
+	/* Warn if work was already queued, that should not happen. */
+	WARN_ON(!ret);
+}
+
+/**
+ * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom
+ * @katom:	Katom to cancel
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ */
+static void
+kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Cancel callbacks and clean up. */
+	kbase_fence_free_callbacks(katom);
+
+	/* Mark the atom as handled in case all fences signaled just before
+	 * canceling the callbacks and the worker was queued.
+	 */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 * there is a race between job completion and cancellation.
+	 */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom, NULL))
+			kbase_js_sched_all(katom->kctx->kbdev);
+	}
+}
+
+/**
+ * kbase_dma_fence_work() - Worker thread called when a fence is signaled
+ * @pwork:	work_struct containing a pointer to a katom
+ *
+ * This function will clean and mark all dependencies as satisfied
+ */
+static void
+kbase_dma_fence_work(struct work_struct *pwork)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+
+	katom = container_of(pwork, struct kbase_jd_atom, work);
+	ctx = &katom->kctx->jctx;
+
+	mutex_lock(&ctx->lock);
+	if (kbase_fence_dep_count_read(katom) != 0)
+		goto out;
+
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Remove atom from list of dma-fence waiting atoms. */
+	kbase_dma_fence_waiters_remove(katom);
+	/* Cleanup callbacks. */
+	kbase_fence_free_callbacks(katom);
+	/*
+	 * Queue atom on GPU, unless it has already completed due to a failing
+	 * dependency. Run jd_done_nolock() on the katom if it is completed.
+	 */
+	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
+		jd_done_nolock(katom, NULL);
+	else
+		kbase_jd_dep_clear_locked(katom);
+
+out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
+#else
+kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+
+	/* If the atom is zapped dep_count will be forced to a negative number
+	 * preventing this callback from ever scheduling work. Which in turn
+	 * would reschedule the atom.
+	 */
+
+	if (kbase_fence_dep_count_dec_and_test(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+static int
+kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom,
+					 struct dma_resv *resv,
+					 bool exclusive)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *excl_fence = NULL;
+	struct fence **shared_fences = NULL;
+#else
+	struct dma_fence *excl_fence = NULL;
+	struct dma_fence **shared_fences = NULL;
+#endif
+	unsigned int shared_count = 0;
+	int err, i;
+
+	err = dma_resv_get_fences_rcu(resv,
+						&excl_fence,
+						&shared_count,
+						&shared_fences);
+	if (err)
+		return err;
+
+	if (excl_fence) {
+		err = kbase_fence_add_callback(katom,
+						excl_fence,
+						kbase_dma_fence_cb);
+
+		/* Release our reference, taken by reservation_object_get_fences_rcu(),
+		 * to the fence. We have set up our callback (if that was possible),
+		 * and it's the fence's owner is responsible for singling the fence
+		 * before allowing it to disappear.
+		 */
+		dma_fence_put(excl_fence);
+
+		if (err)
+			goto out;
+	}
+
+	if (exclusive) {
+		for (i = 0; i < shared_count; i++) {
+			err = kbase_fence_add_callback(katom,
+							shared_fences[i],
+							kbase_dma_fence_cb);
+			if (err)
+				goto out;
+		}
+	}
+
+	/* Release all our references to the shared fences, taken by
+	 * reservation_object_get_fences_rcu(). We have set up our callback (if
+	 * that was possible), and it's the fence's owner is responsible for
+	 * signaling the fence before allowing it to disappear.
+	 */
+out:
+	for (i = 0; i < shared_count; i++)
+		dma_fence_put(shared_fences[i]);
+	kfree(shared_fences);
+
+	if (err) {
+		/*
+		 * On error, cancel and clean up all callbacks that was set up
+		 * before the error.
+		 */
+		kbase_fence_free_callbacks(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_add_reservation(struct dma_resv *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive)
+{
+	unsigned int i;
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		/* Duplicate resource, ignore */
+		if (info->resv_objs[i] == resv)
+			return;
+	}
+
+	info->resv_objs[info->dma_fence_resv_count] = resv;
+	if (exclusive)
+		set_bit(info->dma_fence_resv_count,
+			info->dma_fence_excl_bitmap);
+	(info->dma_fence_resv_count)++;
+}
+
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info)
+{
+	int err, i;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct ww_acquire_ctx ww_ctx;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence) {
+		err = -ENOMEM;
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d creating fence.\n", err);
+		return err;
+	}
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_dma_fence_lock_reservations(info, &ww_ctx);
+	if (err) {
+		dev_err(katom->kctx->kbdev->dev,
+			"Error %d locking reservations.\n", err);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_fence_out_remove(katom);
+		return err;
+	}
+
+	for (i = 0; i < info->dma_fence_resv_count; i++) {
+		struct dma_resv *obj = info->resv_objs[i];
+
+		if (!test_bit(i, info->dma_fence_excl_bitmap)) {
+			err = dma_resv_reserve_shared(obj, 1);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d reserving space for shared fence.\n", err);
+				goto end;
+			}
+
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, false);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			dma_resv_add_shared_fence(obj, fence);
+		} else {
+			err = kbase_dma_fence_add_reservation_callback(katom, obj, true);
+			if (err) {
+				dev_err(katom->kctx->kbdev->dev,
+					"Error %d adding reservation to callback.\n", err);
+				goto end;
+			}
+
+			dma_resv_add_excl_fence(obj, fence);
+		}
+	}
+
+end:
+	kbase_dma_fence_unlock_reservations(info, &ww_ctx);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_dep_count_set(katom, -1);
+			kbase_fence_free_callbacks(katom);
+		} else {
+			/* Add katom to the list of dma-buf fence waiting atoms
+			 * only if it is still waiting.
+			 */
+			kbase_dma_fence_waiters_add(katom);
+		}
+	} else {
+		/* There was an error, cancel callbacks, set dep_count to -1 to
+		 * indicate that the atom has been handled (the caller will
+		 * kill it for us), signal the fence, free callbacks and the
+		 * fence.
+		 */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+		kbase_dma_fence_signal(katom);
+	}
+
+	return err;
+}
+
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
+{
+	struct list_head *list = &kctx->dma_fence.waiting_resource;
+
+	while (!list_empty(list)) {
+		struct kbase_jd_atom *katom;
+
+		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+		kbase_dma_fence_waiters_remove(katom);
+		kbase_dma_fence_cancel_atom(katom);
+	}
+}
+
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
+{
+	/* Cancel callbacks and clean up. */
+	if (kbase_fence_free_callbacks(katom))
+		kbase_dma_fence_queue_work(katom);
+}
+
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
+{
+	if (!katom->dma_fence.fence)
+		return;
+
+	/* Signal the atom's fence. */
+	dma_fence_signal(katom->dma_fence.fence);
+
+	kbase_fence_out_remove(katom);
+
+	kbase_fence_free_callbacks(katom);
+}
+
+void kbase_dma_fence_term(struct kbase_context *kctx)
+{
+	destroy_workqueue(kctx->dma_fence.wq);
+	kctx->dma_fence.wq = NULL;
+}
+
+int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource);
+
+	kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d",
+					     WQ_UNBOUND, 1, kctx->pid);
+	if (!kctx->dma_fence.wq)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
new file mode 100644
index 0000000000000..fd02d15f8421f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DMA_FENCE_H_
+#define _KBASE_DMA_FENCE_H_
+
+#ifdef CONFIG_MALI_DMA_FENCE
+
+#include <linux/list.h>
+#include <linux/dma-resv.h>
+#include <mali_kbase_fence.h>
+
+
+/* Forward declaration from mali_kbase_defs.h */
+struct kbase_jd_atom;
+struct kbase_context;
+
+/**
+ * struct kbase_dma_fence_resv_info - Structure with list of reservation objects
+ * @resv_objs:             Array of reservation objects to attach the
+ *                         new fence to.
+ * @dma_fence_resv_count:  Number of reservation objects in the array.
+ * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive.
+ *
+ * This is used by some functions to pass around a collection of data about
+ * reservation objects.
+ */
+struct kbase_dma_fence_resv_info {
+	struct dma_resv **resv_objs;
+	unsigned int dma_fence_resv_count;
+	unsigned long *dma_fence_excl_bitmap;
+};
+
+/**
+ * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs
+ * @resv:      Reservation object to add to the array.
+ * @info:      Pointer to struct with current reservation info
+ * @exclusive: Boolean indicating if exclusive access is needed
+ *
+ * The function adds a new reservation_object to an existing array of
+ * reservation_objects. At the same time keeps track of which objects require
+ * exclusive access in dma_fence_excl_bitmap.
+ */
+void kbase_dma_fence_add_reservation(struct dma_resv *resv,
+				     struct kbase_dma_fence_resv_info *info,
+				     bool exclusive);
+
+/**
+ * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs
+ * @katom: Katom with the external dependency.
+ * @info:  Pointer to struct with current reservation info
+ *
+ * Return: An error code or 0 if succeeds
+ */
+int kbase_dma_fence_wait(struct kbase_jd_atom *katom,
+			 struct kbase_dma_fence_resv_info *info);
+
+/**
+ * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx
+ * @kctx: Pointer to kbase context
+ *
+ * This function will cancel and clean up all katoms on @kctx that is waiting
+ * on dma-buf fences.
+ *
+ * Locking: jctx.lock needs to be held when calling this function.
+ */
+void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom
+ * @katom: Pointer to katom whose callbacks are to be canceled
+ *
+ * This function cancels all dma-buf fence callbacks on @katom, but does not
+ * cancel the katom itself.
+ *
+ * The caller is responsible for ensuring that jd_done_nolock is called on
+ * @katom.
+ *
+ * Locking: jctx.lock must be held when calling this function.
+ */
+void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait
+ * @katom: Pointer to katom to signal and clean up
+ *
+ * This function will signal the @katom's fence, if it has one, and clean up
+ * the callback data from the katom's wait on earlier fences.
+ *
+ * Locking: jctx.lock must be held while calling this function.
+ */
+void kbase_dma_fence_signal(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_dma_fence_term() - Terminate Mali dma-fence context
+ * @kctx: kbase context to terminate
+ */
+void kbase_dma_fence_term(struct kbase_context *kctx);
+
+/**
+ * kbase_dma_fence_init() - Initialize Mali dma-fence context
+ * @kctx: kbase context to initialize
+ */
+int kbase_dma_fence_init(struct kbase_context *kctx);
+
+
+#else /* CONFIG_MALI_DMA_FENCE */
+/* Dummy functions for when dma-buf fence isn't enabled. */
+
+static inline int kbase_dma_fence_init(struct kbase_context *kctx)
+{
+	return 0;
+}
+
+static inline void kbase_dma_fence_term(struct kbase_context *kctx) {}
+#endif /* CONFIG_MALI_DMA_FENCE */
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 0000000000000..3c9cef3641346
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,262 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+#include <mali_kbase_tlstream.h>
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+
+	return (atomic_read(&ctx->event_count) != 0) ||
+			(atomic_read(&ctx->event_closed) != 0);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (!atomic_read(&ctx->event_closed)) {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+
+		/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+		mutex_unlock(&ctx->event_mutex);
+		uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+		memset(&uevent->udata, 0, sizeof(uevent->udata));
+		dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+		return 0;
+	}
+
+	/* normal event processing */
+	atomic_dec(&ctx->event_count);
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	mutex_lock(&ctx->jctx.lock);
+	uevent->udata = kbase_event_process(ctx, atom);
+	mutex_unlock(&ctx->jctx.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+/**
+ * kbase_event_process_noreport_worker - Worker for processing atoms that do not
+ *                                       return an event but do have external
+ *                                       resources
+ * @data:  Work structure
+ */
+static void kbase_event_process_noreport_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(katom);
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_event_process(kctx, katom);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+/**
+ * kbase_event_process_noreport - Process atoms that do not return an event
+ * @kctx:  Context pointer
+ * @katom: Atom to be processed
+ *
+ * Atoms that do not have external resources will be processed immediately.
+ * Atoms that do have external resources will be processed on a workqueue, in
+ * order to avoid locking issues.
+ */
+static void kbase_event_process_noreport(struct kbase_context *kctx,
+		struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		INIT_WORK(&katom->work, kbase_event_process_noreport_worker);
+		queue_work(kctx->event_workq, &katom->work);
+	} else {
+		kbase_event_process(kctx, katom);
+	}
+}
+
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process_noreport(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process_noreport(ctx, atom);
+		return;
+	}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
+
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	atomic_set(&kctx->event_closed, true);
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
+	mutex_init(&kctx->event_mutex);
+	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
+	atomic_set(&kctx->event_closed, false);
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	int event_count;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.c b/drivers/gpu/arm/midgard/mali_kbase_fence.c
new file mode 100644
index 0000000000000..ac8272c900bba
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
+#include <mali_kbase.h>
+
+/* Spin lock protecting all Mali fences as fence->lock. */
+static DEFINE_SPINLOCK(kbase_fence_lock);
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_driver_name(struct fence *fence)
+#else
+kbase_fence_get_driver_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_drv_name;
+}
+
+static const char *
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_get_timeline_name(struct fence *fence)
+#else
+kbase_fence_get_timeline_name(struct dma_fence *fence)
+#endif
+{
+	return kbase_timeline_name;
+}
+
+static bool
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_enable_signaling(struct fence *fence)
+#else
+kbase_fence_enable_signaling(struct dma_fence *fence)
+#endif
+{
+	return true;
+}
+
+static void
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+kbase_fence_fence_value_str(struct fence *fence, char *str, int size)
+#else
+kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
+#endif
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+const struct fence_ops kbase_fence_ops = {
+	.wait = fence_default_wait,
+#else
+const struct dma_fence_ops kbase_fence_ops = {
+	.wait = dma_fence_default_wait,
+#endif
+	.get_driver_name = kbase_fence_get_driver_name,
+	.get_timeline_name = kbase_fence_get_timeline_name,
+	.enable_signaling = kbase_fence_enable_signaling,
+	.fence_value_str = kbase_fence_fence_value_str
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#else
+struct dma_fence *
+kbase_fence_out_new(struct kbase_jd_atom *katom)
+#endif
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	WARN_ON(katom->dma_fence.fence);
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	dma_fence_init(fence,
+		       &kbase_fence_ops,
+		       &kbase_fence_lock,
+		       katom->dma_fence.context,
+		       atomic_inc_return(&katom->dma_fence.seqno));
+
+	katom->dma_fence.fence = fence;
+
+	return fence;
+}
+
+bool
+kbase_fence_free_callbacks(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_cb *cb, *tmp;
+	bool res = false;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	/* Clean up and free callbacks. */
+	list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) {
+		bool ret;
+
+		/* Cancel callbacks that hasn't been called yet. */
+		ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb);
+		if (ret) {
+			int ret;
+
+			/* Fence had not signaled, clean up after
+			 * canceling.
+			 */
+			ret = atomic_dec_return(&katom->dma_fence.dep_count);
+
+			if (unlikely(ret == 0))
+				res = true;
+		}
+
+		/*
+		 * Release the reference taken in
+		 * kbase_fence_add_callback().
+		 */
+		dma_fence_put(cb->fence);
+		list_del(&cb->node);
+		kfree(cb);
+	}
+
+	return res;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct fence *fence,
+			 fence_func_t callback)
+#else
+int
+kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			 struct dma_fence *fence,
+			 dma_fence_func_t callback)
+#endif
+{
+	int err = 0;
+	struct kbase_fence_cb *kbase_fence_cb;
+
+	if (!fence)
+		return -EINVAL;
+
+	kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL);
+	if (!kbase_fence_cb)
+		return -ENOMEM;
+
+	kbase_fence_cb->fence = fence;
+	kbase_fence_cb->katom = katom;
+	INIT_LIST_HEAD(&kbase_fence_cb->node);
+
+	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
+				     callback);
+	if (err == -ENOENT) {
+		/* Fence signaled, get the completion result */
+		err = dma_fence_get_status(fence);
+
+		/* remap success completion to err code */
+		if (err == 1)
+			err = 0;
+
+		kfree(kbase_fence_cb);
+	} else if (err) {
+		kfree(kbase_fence_cb);
+	} else {
+		/*
+		 * Get reference to fence that will be kept until callback gets
+		 * cleaned up in kbase_fence_free_callbacks().
+		 */
+		dma_fence_get(fence);
+		atomic_inc(&katom->dma_fence.dep_count);
+		/* Add callback to katom's list of callbacks */
+		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.h b/drivers/gpu/arm/midgard/mali_kbase_fence.h
new file mode 100644
index 0000000000000..d7a65e0fb1c82
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.h
@@ -0,0 +1,280 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_H_
+#define _KBASE_FENCE_H_
+
+/*
+ * mali_kbase_fence.[hc] has common fence code used by both
+ * - CONFIG_MALI_DMA_FENCE - implicit DMA fences
+ * - CONFIG_SYNC_FILE      - explicit fences beginning with 4.9 kernel
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/list.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase.h"
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+extern const struct fence_ops kbase_fence_ops;
+#else
+extern const struct dma_fence_ops kbase_fence_ops;
+#endif
+
+/**
+* struct kbase_fence_cb - Mali dma-fence callback data struct
+* @fence_cb: Callback function
+* @katom:    Pointer to katom that is waiting on this callback
+* @fence:    Pointer to the fence object on which this callback is waiting
+* @node:     List head for linking this callback to the katom
+*/
+struct kbase_fence_cb {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence_cb fence_cb;
+	struct fence *fence;
+#else
+	struct dma_fence_cb fence_cb;
+	struct dma_fence *fence;
+#endif
+	struct kbase_jd_atom *katom;
+	struct list_head node;
+};
+
+/**
+ * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
+ * @katom: Atom to create an output fence for
+ *
+ * return: A new fence object on success, NULL on failure.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#else
+struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
+#endif
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_fence_in_set() - Assign input fence to atom
+ * @katom: Atom to assign input fence to
+ * @fence: Input fence to assign to atom
+ *
+ * This function will take ownership of one fence reference!
+ */
+#define kbase_fence_fence_in_set(katom, fence) \
+	do { \
+		WARN_ON((katom)->dma_fence.fence_in); \
+		(katom)->dma_fence.fence_in = fence; \
+	} while (0)
+#endif
+
+/**
+ * kbase_fence_out_remove() - Removes the output fence from atom
+ * @katom: Atom to remove output fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence) {
+		dma_fence_put(katom->dma_fence.fence);
+		katom->dma_fence.fence = NULL;
+	}
+}
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_out_remove() - Removes the input fence from atom
+ * @katom: Atom to remove input fence for
+ *
+ * This will also release the reference to this fence which the atom keeps
+ */
+static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->dma_fence.fence_in) {
+		dma_fence_put(katom->dma_fence.fence_in);
+		katom->dma_fence.fence_in = NULL;
+	}
+}
+#endif
+
+/**
+ * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us
+ * @katom: Atom to check output fence for
+ *
+ * Return: true if fence exists and is valid, otherwise false
+ */
+static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
+{
+	return katom->dma_fence.fence &&
+				katom->dma_fence.fence->ops == &kbase_fence_ops;
+}
+
+/**
+ * kbase_fence_out_signal() - Signal output fence of atom
+ * @katom: Atom to signal output fence for
+ * @status: Status to signal with (0 for success, < 0 for error)
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
+					 int status)
+{
+	if (status) {
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+		fence_set_error(katom->dma_fence.fence, status);
+#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+		dma_fence_set_error(katom->dma_fence.fence, status);
+#else
+		katom->dma_fence.fence->status = status;
+#endif
+	}
+	return dma_fence_signal(katom->dma_fence.fence);
+}
+
+/**
+ * kbase_fence_add_callback() - Add callback on @fence to block @katom
+ * @katom: Pointer to katom that will be blocked by @fence
+ * @fence: Pointer to fence on which to set up the callback
+ * @callback: Pointer to function to be called when fence is signaled
+ *
+ * Caller needs to hold a reference to @fence when calling this function, and
+ * the caller is responsible for releasing that reference.  An additional
+ * reference to @fence will be taken when the callback was successfully set up
+ * and @fence needs to be kept valid until the callback has been called and
+ * cleanup have been done.
+ *
+ * Return: 0 on success: fence was either already signaled, or callback was
+ * set up. Negative error code is returned on error.
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct fence *fence,
+			     fence_func_t callback);
+#else
+int kbase_fence_add_callback(struct kbase_jd_atom *katom,
+			     struct dma_fence *fence,
+			     dma_fence_func_t callback);
+#endif
+
+/**
+ * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value
+ * @katom: Atom to set dep_count for
+ * @val: value to set dep_count to
+ *
+ * The dep_count is available to the users of this module so that they can
+ * synchronize completion of the wait with cancellation and adding of more
+ * callbacks. For instance, a user could do the following:
+ *
+ * dep_count set to 1
+ * callback #1 added, dep_count is increased to 2
+ *                             callback #1 happens, dep_count decremented to 1
+ *                             since dep_count > 0, no completion is done
+ * callback #2 is added, dep_count is increased to 2
+ * dep_count decremented to 1
+ *                             callback #2 happens, dep_count decremented to 0
+ *                             since dep_count now is zero, completion executes
+ *
+ * The dep_count can also be used to make sure that the completion only
+ * executes once. This is typically done by setting dep_count to -1 for the
+ * thread that takes on this responsibility.
+ */
+static inline void
+kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val)
+{
+	atomic_set(&katom->dma_fence.dep_count, val);
+}
+
+/**
+ * kbase_fence_dep_count_dec_and_test() - Decrements dep_count
+ * @katom: Atom to decrement dep_count for
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: true if value was decremented to zero, otherwise false
+ */
+static inline bool
+kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom)
+{
+	return atomic_dec_and_test(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_dep_count_read() - Returns the current dep_count value
+ * @katom: Pointer to katom
+ *
+ * See @kbase_fence_dep_count_set for general description about dep_count
+ *
+ * Return: The current dep_count value
+ */
+static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom)
+{
+	return atomic_read(&katom->dma_fence.dep_count);
+}
+
+/**
+ * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom
+ * @katom: Pointer to katom
+ *
+ * This function will free all fence callbacks on the katom's list of
+ * callbacks. Callbacks that have not yet been called, because their fence
+ * hasn't yet signaled, will first be removed from the fence.
+ *
+ * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
+ *
+ * Return: true if dep_count reached 0, otherwise false.
+ */
+bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom);
+
+#if defined(CONFIG_SYNC_FILE)
+/**
+ * kbase_fence_in_get() - Retrieve input fence for atom.
+ * @katom: Atom to get input fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no input fence for atom
+ */
+#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in)
+#endif
+
+/**
+ * kbase_fence_out_get() - Retrieve output fence for atom.
+ * @katom: Atom to get output fence from
+ *
+ * A ref will be taken for the fence, so use @kbase_fence_put() to release it
+ *
+ * Return: The fence, or NULL if there is no output fence for atom
+ */
+#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence)
+
+/**
+ * kbase_fence_put() - Releases a reference to a fence
+ * @fence: Fence to release reference for.
+ */
+#define kbase_fence_put(fence) dma_fence_put(fence)
+
+
+#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
new file mode 100644
index 0000000000000..607a95c1b2ad8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_FENCE_DEFS_H_
+#define _KBASE_FENCE_DEFS_H_
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*)
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+
+#include <linux/fence.h>
+
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \
+	(a)->status ?: 1 \
+	: 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */
+
+#endif /* _KBASE_FENCE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 0000000000000..4f54817210270
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 0000000000000..7077c3a873228
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,359 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
+};
+
+static void dump_worker(struct work_struct *work);
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+	const char * const *hardware_counters;
+	struct kbase_device *kbdev;
+	uint32_t product_id;
+	uint32_t count;
+
+	if (!total_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	product_id = kbdev->gpu_props.props.core_props.product_id;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			hardware_counters = hardware_counters_mali_tHEx;
+			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			hardware_counters = hardware_counters_mali_tSIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			hardware_counters = hardware_counters_mali_tNOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tNOx);
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			hardware_counters = hardware_counters_mali_tGOx;
+			count = ARRAY_SIZE(hardware_counters_mali_tGOx);
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			hardware_counters = hardware_counters_mali_tKAx;
+			count = ARRAY_SIZE(hardware_counters_mali_tKAx);
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			hardware_counters = hardware_counters_mali_tTRx;
+			count = ARRAY_SIZE(hardware_counters_mali_tTRx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	} else {
+		switch (product_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
+				product_id);
+			break;
+		}
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	*total_counters = count;
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counters && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_ioctl_hwcnt_reader_setup setup;
+	uint32_t dump_size = 0, i = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
+		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a v4 device (Mali-T6xx or Mali-T72x) */
+	if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using any other device */
+	} else {
+		uint32_t nr_l2, nr_sc_bits, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc_bits = fls64(core_mask);
+
+		/* The job manager and tiler sets of counters
+		 * are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto free_vinstr_buffer;
+
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+	in_out_info->size = dump_size;
+
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
+
+	return hand;
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+		struct kbase_gator_hwcnt_handles *opaque_handles,
+		uint32_t * const success)
+{
+
+	if (opaque_handles && success) {
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
+	}
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		schedule_work(&opaque_handles->dump_work);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 0000000000000..bd0589ed6c1a0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,224 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ *    kbase_gator_hwcnt_term_names(). This function must only be called if
+ *    init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                    Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 0000000000000..5d38c7b735531
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2178 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MISS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MISS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MISS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MESSAGES_SENT",
+	"T82x_MESSAGES_RECEIVED",
+	"T82x_GPU_ACTIVE",
+	"T82x_IRQ_ACTIVE",
+	"T82x_JS0_JOBS",
+	"T82x_JS0_TASKS",
+	"T82x_JS0_ACTIVE",
+	"",
+	"T82x_JS0_WAIT_READ",
+	"T82x_JS0_WAIT_ISSUE",
+	"T82x_JS0_WAIT_DEPEND",
+	"T82x_JS0_WAIT_FINISH",
+	"T82x_JS1_JOBS",
+	"T82x_JS1_TASKS",
+	"T82x_JS1_ACTIVE",
+	"",
+	"T82x_JS1_WAIT_READ",
+	"T82x_JS1_WAIT_ISSUE",
+	"T82x_JS1_WAIT_DEPEND",
+	"T82x_JS1_WAIT_FINISH",
+	"T82x_JS2_JOBS",
+	"T82x_JS2_TASKS",
+	"T82x_JS2_ACTIVE",
+	"",
+	"T82x_JS2_WAIT_READ",
+	"T82x_JS2_WAIT_ISSUE",
+	"T82x_JS2_WAIT_DEPEND",
+	"T82x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T82x_TI_JOBS_PROCESSED",
+	"T82x_TI_TRIANGLES",
+	"T82x_TI_QUADS",
+	"T82x_TI_POLYGONS",
+	"T82x_TI_POINTS",
+	"T82x_TI_LINES",
+	"T82x_TI_FRONT_FACING",
+	"T82x_TI_BACK_FACING",
+	"T82x_TI_PRIM_VISIBLE",
+	"T82x_TI_PRIM_CULLED",
+	"T82x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T82x_FRAG_ACTIVE",
+	"T82x_FRAG_PRIMITIVES",
+	"T82x_FRAG_PRIMITIVES_DROPPED",
+	"T82x_FRAG_CYCLES_DESC",
+	"T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T82x_FRAG_CYCLES_VERT",
+	"T82x_FRAG_CYCLES_TRISETUP",
+	"T82x_FRAG_CYCLES_EZS_ACTIVE",
+	"T82x_FRAG_THREADS",
+	"T82x_FRAG_DUMMY_THREADS",
+	"T82x_FRAG_QUADS_RAST",
+	"T82x_FRAG_QUADS_EZS_TEST",
+	"T82x_FRAG_QUADS_EZS_KILLED",
+	"T82x_FRAG_THREADS_LZS_TEST",
+	"T82x_FRAG_THREADS_LZS_KILLED",
+	"T82x_FRAG_CYCLES_NO_TILE",
+	"T82x_FRAG_NUM_TILES",
+	"T82x_FRAG_TRANS_ELIM",
+	"T82x_COMPUTE_ACTIVE",
+	"T82x_COMPUTE_TASKS",
+	"T82x_COMPUTE_THREADS",
+	"T82x_COMPUTE_CYCLES_DESC",
+	"T82x_TRIPIPE_ACTIVE",
+	"T82x_ARITH_WORDS",
+	"T82x_ARITH_CYCLES_REG",
+	"T82x_ARITH_CYCLES_L0",
+	"T82x_ARITH_FRAG_DEPEND",
+	"T82x_LS_WORDS",
+	"T82x_LS_ISSUES",
+	"T82x_LS_REISSUE_ATTR",
+	"T82x_LS_REISSUES_VARY",
+	"T82x_LS_VARY_RV_MISS",
+	"T82x_LS_VARY_RV_HIT",
+	"T82x_LS_NO_UNPARK",
+	"T82x_TEX_WORDS",
+	"T82x_TEX_BUBBLES",
+	"T82x_TEX_WORDS_L0",
+	"T82x_TEX_WORDS_DESC",
+	"T82x_TEX_ISSUES",
+	"T82x_TEX_RECIRC_FMISS",
+	"T82x_TEX_RECIRC_DESC",
+	"T82x_TEX_RECIRC_MULTI",
+	"T82x_TEX_RECIRC_PMISS",
+	"T82x_TEX_RECIRC_CONF",
+	"T82x_LSC_READ_HITS",
+	"T82x_LSC_READ_OP",
+	"T82x_LSC_WRITE_HITS",
+	"T82x_LSC_WRITE_OP",
+	"T82x_LSC_ATOMIC_HITS",
+	"T82x_LSC_ATOMIC_OP",
+	"T82x_LSC_LINE_FETCHES",
+	"T82x_LSC_DIRTY_LINE",
+	"T82x_LSC_SNOOPS",
+	"T82x_AXI_TLB_STALL",
+	"T82x_AXI_TLB_MISS",
+	"T82x_AXI_TLB_TRANSACTION",
+	"T82x_LS_TLB_MISS",
+	"T82x_LS_TLB_HIT",
+	"T82x_AXI_BEATS_READ",
+	"T82x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T82x_MMU_HIT",
+	"T82x_MMU_NEW_MISS",
+	"T82x_MMU_REPLAY_FULL",
+	"T82x_MMU_REPLAY_MISS",
+	"T82x_MMU_TABLE_WALK",
+	"T82x_MMU_REQUESTS",
+	"",
+	"",
+	"T82x_UTLB_HIT",
+	"T82x_UTLB_NEW_MISS",
+	"T82x_UTLB_REPLAY_FULL",
+	"T82x_UTLB_REPLAY_MISS",
+	"T82x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T82x_L2_EXT_WRITE_BEATS",
+	"T82x_L2_EXT_READ_BEATS",
+	"T82x_L2_ANY_LOOKUP",
+	"T82x_L2_READ_LOOKUP",
+	"T82x_L2_SREAD_LOOKUP",
+	"T82x_L2_READ_REPLAY",
+	"T82x_L2_READ_SNOOP",
+	"T82x_L2_READ_HIT",
+	"T82x_L2_CLEAN_MISS",
+	"T82x_L2_WRITE_LOOKUP",
+	"T82x_L2_SWRITE_LOOKUP",
+	"T82x_L2_WRITE_REPLAY",
+	"T82x_L2_WRITE_SNOOP",
+	"T82x_L2_WRITE_HIT",
+	"T82x_L2_EXT_READ_FULL",
+	"",
+	"T82x_L2_EXT_WRITE_FULL",
+	"T82x_L2_EXT_R_W_HAZARD",
+	"T82x_L2_EXT_READ",
+	"T82x_L2_EXT_READ_LINE",
+	"T82x_L2_EXT_WRITE",
+	"T82x_L2_EXT_WRITE_LINE",
+	"T82x_L2_EXT_WRITE_SMALL",
+	"T82x_L2_EXT_BARRIER",
+	"T82x_L2_EXT_AR_STALL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_EXT_RD_BUF_FULL",
+	"T82x_L2_EXT_R_RAW",
+	"T82x_L2_EXT_W_STALL",
+	"T82x_L2_EXT_W_BUF_FULL",
+	"T82x_L2_EXT_R_BUF_FULL",
+	"T82x_L2_TAG_HAZARD",
+	"T82x_L2_SNOOP_FULL",
+	"T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MESSAGES_SENT",
+	"T83x_MESSAGES_RECEIVED",
+	"T83x_GPU_ACTIVE",
+	"T83x_IRQ_ACTIVE",
+	"T83x_JS0_JOBS",
+	"T83x_JS0_TASKS",
+	"T83x_JS0_ACTIVE",
+	"",
+	"T83x_JS0_WAIT_READ",
+	"T83x_JS0_WAIT_ISSUE",
+	"T83x_JS0_WAIT_DEPEND",
+	"T83x_JS0_WAIT_FINISH",
+	"T83x_JS1_JOBS",
+	"T83x_JS1_TASKS",
+	"T83x_JS1_ACTIVE",
+	"",
+	"T83x_JS1_WAIT_READ",
+	"T83x_JS1_WAIT_ISSUE",
+	"T83x_JS1_WAIT_DEPEND",
+	"T83x_JS1_WAIT_FINISH",
+	"T83x_JS2_JOBS",
+	"T83x_JS2_TASKS",
+	"T83x_JS2_ACTIVE",
+	"",
+	"T83x_JS2_WAIT_READ",
+	"T83x_JS2_WAIT_ISSUE",
+	"T83x_JS2_WAIT_DEPEND",
+	"T83x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T83x_TI_JOBS_PROCESSED",
+	"T83x_TI_TRIANGLES",
+	"T83x_TI_QUADS",
+	"T83x_TI_POLYGONS",
+	"T83x_TI_POINTS",
+	"T83x_TI_LINES",
+	"T83x_TI_FRONT_FACING",
+	"T83x_TI_BACK_FACING",
+	"T83x_TI_PRIM_VISIBLE",
+	"T83x_TI_PRIM_CULLED",
+	"T83x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T83x_FRAG_ACTIVE",
+	"T83x_FRAG_PRIMITIVES",
+	"T83x_FRAG_PRIMITIVES_DROPPED",
+	"T83x_FRAG_CYCLES_DESC",
+	"T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T83x_FRAG_CYCLES_VERT",
+	"T83x_FRAG_CYCLES_TRISETUP",
+	"T83x_FRAG_CYCLES_EZS_ACTIVE",
+	"T83x_FRAG_THREADS",
+	"T83x_FRAG_DUMMY_THREADS",
+	"T83x_FRAG_QUADS_RAST",
+	"T83x_FRAG_QUADS_EZS_TEST",
+	"T83x_FRAG_QUADS_EZS_KILLED",
+	"T83x_FRAG_THREADS_LZS_TEST",
+	"T83x_FRAG_THREADS_LZS_KILLED",
+	"T83x_FRAG_CYCLES_NO_TILE",
+	"T83x_FRAG_NUM_TILES",
+	"T83x_FRAG_TRANS_ELIM",
+	"T83x_COMPUTE_ACTIVE",
+	"T83x_COMPUTE_TASKS",
+	"T83x_COMPUTE_THREADS",
+	"T83x_COMPUTE_CYCLES_DESC",
+	"T83x_TRIPIPE_ACTIVE",
+	"T83x_ARITH_WORDS",
+	"T83x_ARITH_CYCLES_REG",
+	"T83x_ARITH_CYCLES_L0",
+	"T83x_ARITH_FRAG_DEPEND",
+	"T83x_LS_WORDS",
+	"T83x_LS_ISSUES",
+	"T83x_LS_REISSUE_ATTR",
+	"T83x_LS_REISSUES_VARY",
+	"T83x_LS_VARY_RV_MISS",
+	"T83x_LS_VARY_RV_HIT",
+	"T83x_LS_NO_UNPARK",
+	"T83x_TEX_WORDS",
+	"T83x_TEX_BUBBLES",
+	"T83x_TEX_WORDS_L0",
+	"T83x_TEX_WORDS_DESC",
+	"T83x_TEX_ISSUES",
+	"T83x_TEX_RECIRC_FMISS",
+	"T83x_TEX_RECIRC_DESC",
+	"T83x_TEX_RECIRC_MULTI",
+	"T83x_TEX_RECIRC_PMISS",
+	"T83x_TEX_RECIRC_CONF",
+	"T83x_LSC_READ_HITS",
+	"T83x_LSC_READ_OP",
+	"T83x_LSC_WRITE_HITS",
+	"T83x_LSC_WRITE_OP",
+	"T83x_LSC_ATOMIC_HITS",
+	"T83x_LSC_ATOMIC_OP",
+	"T83x_LSC_LINE_FETCHES",
+	"T83x_LSC_DIRTY_LINE",
+	"T83x_LSC_SNOOPS",
+	"T83x_AXI_TLB_STALL",
+	"T83x_AXI_TLB_MISS",
+	"T83x_AXI_TLB_TRANSACTION",
+	"T83x_LS_TLB_MISS",
+	"T83x_LS_TLB_HIT",
+	"T83x_AXI_BEATS_READ",
+	"T83x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T83x_MMU_HIT",
+	"T83x_MMU_NEW_MISS",
+	"T83x_MMU_REPLAY_FULL",
+	"T83x_MMU_REPLAY_MISS",
+	"T83x_MMU_TABLE_WALK",
+	"T83x_MMU_REQUESTS",
+	"",
+	"",
+	"T83x_UTLB_HIT",
+	"T83x_UTLB_NEW_MISS",
+	"T83x_UTLB_REPLAY_FULL",
+	"T83x_UTLB_REPLAY_MISS",
+	"T83x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T83x_L2_EXT_WRITE_BEATS",
+	"T83x_L2_EXT_READ_BEATS",
+	"T83x_L2_ANY_LOOKUP",
+	"T83x_L2_READ_LOOKUP",
+	"T83x_L2_SREAD_LOOKUP",
+	"T83x_L2_READ_REPLAY",
+	"T83x_L2_READ_SNOOP",
+	"T83x_L2_READ_HIT",
+	"T83x_L2_CLEAN_MISS",
+	"T83x_L2_WRITE_LOOKUP",
+	"T83x_L2_SWRITE_LOOKUP",
+	"T83x_L2_WRITE_REPLAY",
+	"T83x_L2_WRITE_SNOOP",
+	"T83x_L2_WRITE_HIT",
+	"T83x_L2_EXT_READ_FULL",
+	"",
+	"T83x_L2_EXT_WRITE_FULL",
+	"T83x_L2_EXT_R_W_HAZARD",
+	"T83x_L2_EXT_READ",
+	"T83x_L2_EXT_READ_LINE",
+	"T83x_L2_EXT_WRITE",
+	"T83x_L2_EXT_WRITE_LINE",
+	"T83x_L2_EXT_WRITE_SMALL",
+	"T83x_L2_EXT_BARRIER",
+	"T83x_L2_EXT_AR_STALL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_EXT_RD_BUF_FULL",
+	"T83x_L2_EXT_R_RAW",
+	"T83x_L2_EXT_W_STALL",
+	"T83x_L2_EXT_W_BUF_FULL",
+	"T83x_L2_EXT_R_BUF_FULL",
+	"T83x_L2_TAG_HAZARD",
+	"T83x_L2_SNOOP_FULL",
+	"T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MESSAGES_SENT",
+	"T86x_MESSAGES_RECEIVED",
+	"T86x_GPU_ACTIVE",
+	"T86x_IRQ_ACTIVE",
+	"T86x_JS0_JOBS",
+	"T86x_JS0_TASKS",
+	"T86x_JS0_ACTIVE",
+	"",
+	"T86x_JS0_WAIT_READ",
+	"T86x_JS0_WAIT_ISSUE",
+	"T86x_JS0_WAIT_DEPEND",
+	"T86x_JS0_WAIT_FINISH",
+	"T86x_JS1_JOBS",
+	"T86x_JS1_TASKS",
+	"T86x_JS1_ACTIVE",
+	"",
+	"T86x_JS1_WAIT_READ",
+	"T86x_JS1_WAIT_ISSUE",
+	"T86x_JS1_WAIT_DEPEND",
+	"T86x_JS1_WAIT_FINISH",
+	"T86x_JS2_JOBS",
+	"T86x_JS2_TASKS",
+	"T86x_JS2_ACTIVE",
+	"",
+	"T86x_JS2_WAIT_READ",
+	"T86x_JS2_WAIT_ISSUE",
+	"T86x_JS2_WAIT_DEPEND",
+	"T86x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T86x_TI_JOBS_PROCESSED",
+	"T86x_TI_TRIANGLES",
+	"T86x_TI_QUADS",
+	"T86x_TI_POLYGONS",
+	"T86x_TI_POINTS",
+	"T86x_TI_LINES",
+	"T86x_TI_VCACHE_HIT",
+	"T86x_TI_VCACHE_MISS",
+	"T86x_TI_FRONT_FACING",
+	"T86x_TI_BACK_FACING",
+	"T86x_TI_PRIM_VISIBLE",
+	"T86x_TI_PRIM_CULLED",
+	"T86x_TI_PRIM_CLIPPED",
+	"T86x_TI_LEVEL0",
+	"T86x_TI_LEVEL1",
+	"T86x_TI_LEVEL2",
+	"T86x_TI_LEVEL3",
+	"T86x_TI_LEVEL4",
+	"T86x_TI_LEVEL5",
+	"T86x_TI_LEVEL6",
+	"T86x_TI_LEVEL7",
+	"T86x_TI_COMMAND_1",
+	"T86x_TI_COMMAND_2",
+	"T86x_TI_COMMAND_3",
+	"T86x_TI_COMMAND_4",
+	"T86x_TI_COMMAND_5_7",
+	"T86x_TI_COMMAND_8_15",
+	"T86x_TI_COMMAND_16_63",
+	"T86x_TI_COMMAND_64",
+	"T86x_TI_COMPRESS_IN",
+	"T86x_TI_COMPRESS_OUT",
+	"T86x_TI_COMPRESS_FLUSH",
+	"T86x_TI_TIMESTAMPS",
+	"T86x_TI_PCACHE_HIT",
+	"T86x_TI_PCACHE_MISS",
+	"T86x_TI_PCACHE_LINE",
+	"T86x_TI_PCACHE_STALL",
+	"T86x_TI_WRBUF_HIT",
+	"T86x_TI_WRBUF_MISS",
+	"T86x_TI_WRBUF_LINE",
+	"T86x_TI_WRBUF_PARTIAL",
+	"T86x_TI_WRBUF_STALL",
+	"T86x_TI_ACTIVE",
+	"T86x_TI_LOADING_DESC",
+	"T86x_TI_INDEX_WAIT",
+	"T86x_TI_INDEX_RANGE_WAIT",
+	"T86x_TI_VERTEX_WAIT",
+	"T86x_TI_PCACHE_WAIT",
+	"T86x_TI_WRBUF_WAIT",
+	"T86x_TI_BUS_READ",
+	"T86x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_TI_UTLB_HIT",
+	"T86x_TI_UTLB_NEW_MISS",
+	"T86x_TI_UTLB_REPLAY_FULL",
+	"T86x_TI_UTLB_REPLAY_MISS",
+	"T86x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T86x_FRAG_ACTIVE",
+	"T86x_FRAG_PRIMITIVES",
+	"T86x_FRAG_PRIMITIVES_DROPPED",
+	"T86x_FRAG_CYCLES_DESC",
+	"T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T86x_FRAG_CYCLES_VERT",
+	"T86x_FRAG_CYCLES_TRISETUP",
+	"T86x_FRAG_CYCLES_EZS_ACTIVE",
+	"T86x_FRAG_THREADS",
+	"T86x_FRAG_DUMMY_THREADS",
+	"T86x_FRAG_QUADS_RAST",
+	"T86x_FRAG_QUADS_EZS_TEST",
+	"T86x_FRAG_QUADS_EZS_KILLED",
+	"T86x_FRAG_THREADS_LZS_TEST",
+	"T86x_FRAG_THREADS_LZS_KILLED",
+	"T86x_FRAG_CYCLES_NO_TILE",
+	"T86x_FRAG_NUM_TILES",
+	"T86x_FRAG_TRANS_ELIM",
+	"T86x_COMPUTE_ACTIVE",
+	"T86x_COMPUTE_TASKS",
+	"T86x_COMPUTE_THREADS",
+	"T86x_COMPUTE_CYCLES_DESC",
+	"T86x_TRIPIPE_ACTIVE",
+	"T86x_ARITH_WORDS",
+	"T86x_ARITH_CYCLES_REG",
+	"T86x_ARITH_CYCLES_L0",
+	"T86x_ARITH_FRAG_DEPEND",
+	"T86x_LS_WORDS",
+	"T86x_LS_ISSUES",
+	"T86x_LS_REISSUE_ATTR",
+	"T86x_LS_REISSUES_VARY",
+	"T86x_LS_VARY_RV_MISS",
+	"T86x_LS_VARY_RV_HIT",
+	"T86x_LS_NO_UNPARK",
+	"T86x_TEX_WORDS",
+	"T86x_TEX_BUBBLES",
+	"T86x_TEX_WORDS_L0",
+	"T86x_TEX_WORDS_DESC",
+	"T86x_TEX_ISSUES",
+	"T86x_TEX_RECIRC_FMISS",
+	"T86x_TEX_RECIRC_DESC",
+	"T86x_TEX_RECIRC_MULTI",
+	"T86x_TEX_RECIRC_PMISS",
+	"T86x_TEX_RECIRC_CONF",
+	"T86x_LSC_READ_HITS",
+	"T86x_LSC_READ_OP",
+	"T86x_LSC_WRITE_HITS",
+	"T86x_LSC_WRITE_OP",
+	"T86x_LSC_ATOMIC_HITS",
+	"T86x_LSC_ATOMIC_OP",
+	"T86x_LSC_LINE_FETCHES",
+	"T86x_LSC_DIRTY_LINE",
+	"T86x_LSC_SNOOPS",
+	"T86x_AXI_TLB_STALL",
+	"T86x_AXI_TLB_MISS",
+	"T86x_AXI_TLB_TRANSACTION",
+	"T86x_LS_TLB_MISS",
+	"T86x_LS_TLB_HIT",
+	"T86x_AXI_BEATS_READ",
+	"T86x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T86x_MMU_HIT",
+	"T86x_MMU_NEW_MISS",
+	"T86x_MMU_REPLAY_FULL",
+	"T86x_MMU_REPLAY_MISS",
+	"T86x_MMU_TABLE_WALK",
+	"T86x_MMU_REQUESTS",
+	"",
+	"",
+	"T86x_UTLB_HIT",
+	"T86x_UTLB_NEW_MISS",
+	"T86x_UTLB_REPLAY_FULL",
+	"T86x_UTLB_REPLAY_MISS",
+	"T86x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T86x_L2_EXT_WRITE_BEATS",
+	"T86x_L2_EXT_READ_BEATS",
+	"T86x_L2_ANY_LOOKUP",
+	"T86x_L2_READ_LOOKUP",
+	"T86x_L2_SREAD_LOOKUP",
+	"T86x_L2_READ_REPLAY",
+	"T86x_L2_READ_SNOOP",
+	"T86x_L2_READ_HIT",
+	"T86x_L2_CLEAN_MISS",
+	"T86x_L2_WRITE_LOOKUP",
+	"T86x_L2_SWRITE_LOOKUP",
+	"T86x_L2_WRITE_REPLAY",
+	"T86x_L2_WRITE_SNOOP",
+	"T86x_L2_WRITE_HIT",
+	"T86x_L2_EXT_READ_FULL",
+	"",
+	"T86x_L2_EXT_WRITE_FULL",
+	"T86x_L2_EXT_R_W_HAZARD",
+	"T86x_L2_EXT_READ",
+	"T86x_L2_EXT_READ_LINE",
+	"T86x_L2_EXT_WRITE",
+	"T86x_L2_EXT_WRITE_LINE",
+	"T86x_L2_EXT_WRITE_SMALL",
+	"T86x_L2_EXT_BARRIER",
+	"T86x_L2_EXT_AR_STALL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_EXT_RD_BUF_FULL",
+	"T86x_L2_EXT_R_RAW",
+	"T86x_L2_EXT_W_STALL",
+	"T86x_L2_EXT_W_BUF_FULL",
+	"T86x_L2_EXT_R_BUF_FULL",
+	"T86x_L2_TAG_HAZARD",
+	"T86x_L2_SNOOP_FULL",
+	"T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MESSAGES_SENT",
+	"T88x_MESSAGES_RECEIVED",
+	"T88x_GPU_ACTIVE",
+	"T88x_IRQ_ACTIVE",
+	"T88x_JS0_JOBS",
+	"T88x_JS0_TASKS",
+	"T88x_JS0_ACTIVE",
+	"",
+	"T88x_JS0_WAIT_READ",
+	"T88x_JS0_WAIT_ISSUE",
+	"T88x_JS0_WAIT_DEPEND",
+	"T88x_JS0_WAIT_FINISH",
+	"T88x_JS1_JOBS",
+	"T88x_JS1_TASKS",
+	"T88x_JS1_ACTIVE",
+	"",
+	"T88x_JS1_WAIT_READ",
+	"T88x_JS1_WAIT_ISSUE",
+	"T88x_JS1_WAIT_DEPEND",
+	"T88x_JS1_WAIT_FINISH",
+	"T88x_JS2_JOBS",
+	"T88x_JS2_TASKS",
+	"T88x_JS2_ACTIVE",
+	"",
+	"T88x_JS2_WAIT_READ",
+	"T88x_JS2_WAIT_ISSUE",
+	"T88x_JS2_WAIT_DEPEND",
+	"T88x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T88x_TI_JOBS_PROCESSED",
+	"T88x_TI_TRIANGLES",
+	"T88x_TI_QUADS",
+	"T88x_TI_POLYGONS",
+	"T88x_TI_POINTS",
+	"T88x_TI_LINES",
+	"T88x_TI_VCACHE_HIT",
+	"T88x_TI_VCACHE_MISS",
+	"T88x_TI_FRONT_FACING",
+	"T88x_TI_BACK_FACING",
+	"T88x_TI_PRIM_VISIBLE",
+	"T88x_TI_PRIM_CULLED",
+	"T88x_TI_PRIM_CLIPPED",
+	"T88x_TI_LEVEL0",
+	"T88x_TI_LEVEL1",
+	"T88x_TI_LEVEL2",
+	"T88x_TI_LEVEL3",
+	"T88x_TI_LEVEL4",
+	"T88x_TI_LEVEL5",
+	"T88x_TI_LEVEL6",
+	"T88x_TI_LEVEL7",
+	"T88x_TI_COMMAND_1",
+	"T88x_TI_COMMAND_2",
+	"T88x_TI_COMMAND_3",
+	"T88x_TI_COMMAND_4",
+	"T88x_TI_COMMAND_5_7",
+	"T88x_TI_COMMAND_8_15",
+	"T88x_TI_COMMAND_16_63",
+	"T88x_TI_COMMAND_64",
+	"T88x_TI_COMPRESS_IN",
+	"T88x_TI_COMPRESS_OUT",
+	"T88x_TI_COMPRESS_FLUSH",
+	"T88x_TI_TIMESTAMPS",
+	"T88x_TI_PCACHE_HIT",
+	"T88x_TI_PCACHE_MISS",
+	"T88x_TI_PCACHE_LINE",
+	"T88x_TI_PCACHE_STALL",
+	"T88x_TI_WRBUF_HIT",
+	"T88x_TI_WRBUF_MISS",
+	"T88x_TI_WRBUF_LINE",
+	"T88x_TI_WRBUF_PARTIAL",
+	"T88x_TI_WRBUF_STALL",
+	"T88x_TI_ACTIVE",
+	"T88x_TI_LOADING_DESC",
+	"T88x_TI_INDEX_WAIT",
+	"T88x_TI_INDEX_RANGE_WAIT",
+	"T88x_TI_VERTEX_WAIT",
+	"T88x_TI_PCACHE_WAIT",
+	"T88x_TI_WRBUF_WAIT",
+	"T88x_TI_BUS_READ",
+	"T88x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_TI_UTLB_HIT",
+	"T88x_TI_UTLB_NEW_MISS",
+	"T88x_TI_UTLB_REPLAY_FULL",
+	"T88x_TI_UTLB_REPLAY_MISS",
+	"T88x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T88x_FRAG_ACTIVE",
+	"T88x_FRAG_PRIMITIVES",
+	"T88x_FRAG_PRIMITIVES_DROPPED",
+	"T88x_FRAG_CYCLES_DESC",
+	"T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T88x_FRAG_CYCLES_VERT",
+	"T88x_FRAG_CYCLES_TRISETUP",
+	"T88x_FRAG_CYCLES_EZS_ACTIVE",
+	"T88x_FRAG_THREADS",
+	"T88x_FRAG_DUMMY_THREADS",
+	"T88x_FRAG_QUADS_RAST",
+	"T88x_FRAG_QUADS_EZS_TEST",
+	"T88x_FRAG_QUADS_EZS_KILLED",
+	"T88x_FRAG_THREADS_LZS_TEST",
+	"T88x_FRAG_THREADS_LZS_KILLED",
+	"T88x_FRAG_CYCLES_NO_TILE",
+	"T88x_FRAG_NUM_TILES",
+	"T88x_FRAG_TRANS_ELIM",
+	"T88x_COMPUTE_ACTIVE",
+	"T88x_COMPUTE_TASKS",
+	"T88x_COMPUTE_THREADS",
+	"T88x_COMPUTE_CYCLES_DESC",
+	"T88x_TRIPIPE_ACTIVE",
+	"T88x_ARITH_WORDS",
+	"T88x_ARITH_CYCLES_REG",
+	"T88x_ARITH_CYCLES_L0",
+	"T88x_ARITH_FRAG_DEPEND",
+	"T88x_LS_WORDS",
+	"T88x_LS_ISSUES",
+	"T88x_LS_REISSUE_ATTR",
+	"T88x_LS_REISSUES_VARY",
+	"T88x_LS_VARY_RV_MISS",
+	"T88x_LS_VARY_RV_HIT",
+	"T88x_LS_NO_UNPARK",
+	"T88x_TEX_WORDS",
+	"T88x_TEX_BUBBLES",
+	"T88x_TEX_WORDS_L0",
+	"T88x_TEX_WORDS_DESC",
+	"T88x_TEX_ISSUES",
+	"T88x_TEX_RECIRC_FMISS",
+	"T88x_TEX_RECIRC_DESC",
+	"T88x_TEX_RECIRC_MULTI",
+	"T88x_TEX_RECIRC_PMISS",
+	"T88x_TEX_RECIRC_CONF",
+	"T88x_LSC_READ_HITS",
+	"T88x_LSC_READ_OP",
+	"T88x_LSC_WRITE_HITS",
+	"T88x_LSC_WRITE_OP",
+	"T88x_LSC_ATOMIC_HITS",
+	"T88x_LSC_ATOMIC_OP",
+	"T88x_LSC_LINE_FETCHES",
+	"T88x_LSC_DIRTY_LINE",
+	"T88x_LSC_SNOOPS",
+	"T88x_AXI_TLB_STALL",
+	"T88x_AXI_TLB_MISS",
+	"T88x_AXI_TLB_TRANSACTION",
+	"T88x_LS_TLB_MISS",
+	"T88x_LS_TLB_HIT",
+	"T88x_AXI_BEATS_READ",
+	"T88x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T88x_MMU_HIT",
+	"T88x_MMU_NEW_MISS",
+	"T88x_MMU_REPLAY_FULL",
+	"T88x_MMU_REPLAY_MISS",
+	"T88x_MMU_TABLE_WALK",
+	"T88x_MMU_REQUESTS",
+	"",
+	"",
+	"T88x_UTLB_HIT",
+	"T88x_UTLB_NEW_MISS",
+	"T88x_UTLB_REPLAY_FULL",
+	"T88x_UTLB_REPLAY_MISS",
+	"T88x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T88x_L2_EXT_WRITE_BEATS",
+	"T88x_L2_EXT_READ_BEATS",
+	"T88x_L2_ANY_LOOKUP",
+	"T88x_L2_READ_LOOKUP",
+	"T88x_L2_SREAD_LOOKUP",
+	"T88x_L2_READ_REPLAY",
+	"T88x_L2_READ_SNOOP",
+	"T88x_L2_READ_HIT",
+	"T88x_L2_CLEAN_MISS",
+	"T88x_L2_WRITE_LOOKUP",
+	"T88x_L2_SWRITE_LOOKUP",
+	"T88x_L2_WRITE_REPLAY",
+	"T88x_L2_WRITE_SNOOP",
+	"T88x_L2_WRITE_HIT",
+	"T88x_L2_EXT_READ_FULL",
+	"",
+	"T88x_L2_EXT_WRITE_FULL",
+	"T88x_L2_EXT_R_W_HAZARD",
+	"T88x_L2_EXT_READ",
+	"T88x_L2_EXT_READ_LINE",
+	"T88x_L2_EXT_WRITE",
+	"T88x_L2_EXT_WRITE_LINE",
+	"T88x_L2_EXT_WRITE_SMALL",
+	"T88x_L2_EXT_BARRIER",
+	"T88x_L2_EXT_AR_STALL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_EXT_RD_BUF_FULL",
+	"T88x_L2_EXT_R_RAW",
+	"T88x_L2_EXT_W_STALL",
+	"T88x_L2_EXT_W_BUF_FULL",
+	"T88x_L2_EXT_R_BUF_FULL",
+	"T88x_L2_TAG_HAZARD",
+	"T88x_L2_SNOOP_FULL",
+	"T88x_L2_REPLAY_FULL"
+};
+
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+#include "mali_kbase_gator_hwcnt_names_thex.h"
+
+#include "mali_kbase_gator_hwcnt_names_tsix.h"
+
+#include "mali_kbase_gator_hwcnt_names_tnox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tgox.h"
+
+#include "mali_kbase_gator_hwcnt_names_tkax.h"
+
+#include "mali_kbase_gator_hwcnt_names_ttrx.h"
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
new file mode 100644
index 0000000000000..72b5266622a9d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tgox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_
+
+static const char * const hardware_counters_mali_tGOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MESSAGES_SENT",
+	"TGOx_MESSAGES_RECEIVED",
+	"TGOx_GPU_ACTIVE",
+	"TGOx_IRQ_ACTIVE",
+	"TGOx_JS0_JOBS",
+	"TGOx_JS0_TASKS",
+	"TGOx_JS0_ACTIVE",
+	"",
+	"TGOx_JS0_WAIT_READ",
+	"TGOx_JS0_WAIT_ISSUE",
+	"TGOx_JS0_WAIT_DEPEND",
+	"TGOx_JS0_WAIT_FINISH",
+	"TGOx_JS1_JOBS",
+	"TGOx_JS1_TASKS",
+	"TGOx_JS1_ACTIVE",
+	"",
+	"TGOx_JS1_WAIT_READ",
+	"TGOx_JS1_WAIT_ISSUE",
+	"TGOx_JS1_WAIT_DEPEND",
+	"TGOx_JS1_WAIT_FINISH",
+	"TGOx_JS2_JOBS",
+	"TGOx_JS2_TASKS",
+	"TGOx_JS2_ACTIVE",
+	"",
+	"TGOx_JS2_WAIT_READ",
+	"TGOx_JS2_WAIT_ISSUE",
+	"TGOx_JS2_WAIT_DEPEND",
+	"TGOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_TILER_ACTIVE",
+	"TGOx_JOBS_PROCESSED",
+	"TGOx_TRIANGLES",
+	"TGOx_LINES",
+	"TGOx_POINTS",
+	"TGOx_FRONT_FACING",
+	"TGOx_BACK_FACING",
+	"TGOx_PRIM_VISIBLE",
+	"TGOx_PRIM_CULLED",
+	"TGOx_PRIM_CLIPPED",
+	"TGOx_PRIM_SAT_CULLED",
+	"TGOx_BIN_ALLOC_INIT",
+	"TGOx_BIN_ALLOC_OVERFLOW",
+	"TGOx_BUS_READ",
+	"",
+	"TGOx_BUS_WRITE",
+	"TGOx_LOADING_DESC",
+	"TGOx_IDVS_POS_SHAD_REQ",
+	"TGOx_IDVS_POS_SHAD_WAIT",
+	"TGOx_IDVS_POS_SHAD_STALL",
+	"TGOx_IDVS_POS_FIFO_FULL",
+	"TGOx_PREFETCH_STALL",
+	"TGOx_VCACHE_HIT",
+	"TGOx_VCACHE_MISS",
+	"TGOx_VCACHE_LINE_WAIT",
+	"TGOx_VFETCH_POS_READ_WAIT",
+	"TGOx_VFETCH_VERTEX_WAIT",
+	"TGOx_VFETCH_STALL",
+	"TGOx_PRIMASSY_STALL",
+	"TGOx_BBOX_GEN_STALL",
+	"TGOx_IDVS_VBU_HIT",
+	"TGOx_IDVS_VBU_MISS",
+	"TGOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TGOx_IDVS_VAR_SHAD_REQ",
+	"TGOx_IDVS_VAR_SHAD_STALL",
+	"TGOx_BINNER_STALL",
+	"TGOx_ITER_STALL",
+	"TGOx_COMPRESS_MISS",
+	"TGOx_COMPRESS_STALL",
+	"TGOx_PCACHE_HIT",
+	"TGOx_PCACHE_MISS",
+	"TGOx_PCACHE_MISS_STALL",
+	"TGOx_PCACHE_EVICT_STALL",
+	"TGOx_PMGR_PTR_WR_STALL",
+	"TGOx_PMGR_PTR_RD_STALL",
+	"TGOx_PMGR_CMD_WR_STALL",
+	"TGOx_WRBUF_ACTIVE",
+	"TGOx_WRBUF_HIT",
+	"TGOx_WRBUF_MISS",
+	"TGOx_WRBUF_NO_FREE_LINE_STALL",
+	"TGOx_WRBUF_NO_AXI_ID_STALL",
+	"TGOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TGOx_UTLB_TRANS",
+	"TGOx_UTLB_TRANS_HIT",
+	"TGOx_UTLB_TRANS_STALL",
+	"TGOx_UTLB_TRANS_MISS_DELAY",
+	"TGOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_FRAG_ACTIVE",
+	"TGOx_FRAG_PRIMITIVES",
+	"TGOx_FRAG_PRIM_RAST",
+	"TGOx_FRAG_FPK_ACTIVE",
+	"TGOx_FRAG_STARVING",
+	"TGOx_FRAG_WARPS",
+	"TGOx_FRAG_PARTIAL_WARPS",
+	"TGOx_FRAG_QUADS_RAST",
+	"TGOx_FRAG_QUADS_EZS_TEST",
+	"TGOx_FRAG_QUADS_EZS_UPDATE",
+	"TGOx_FRAG_QUADS_EZS_KILL",
+	"TGOx_FRAG_LZS_TEST",
+	"TGOx_FRAG_LZS_KILL",
+	"TGOx_WARP_REG_SIZE_64",
+	"TGOx_FRAG_PTILES",
+	"TGOx_FRAG_TRANS_ELIM",
+	"TGOx_QUAD_FPK_KILLER",
+	"TGOx_FULL_QUAD_WARPS",
+	"TGOx_COMPUTE_ACTIVE",
+	"TGOx_COMPUTE_TASKS",
+	"TGOx_COMPUTE_WARPS",
+	"TGOx_COMPUTE_STARVING",
+	"TGOx_EXEC_CORE_ACTIVE",
+	"TGOx_EXEC_ACTIVE",
+	"TGOx_EXEC_INSTR_COUNT",
+	"TGOx_EXEC_INSTR_DIVERGED",
+	"TGOx_EXEC_INSTR_STARVING",
+	"TGOx_ARITH_INSTR_SINGLE_FMA",
+	"TGOx_ARITH_INSTR_DOUBLE",
+	"TGOx_ARITH_INSTR_MSG",
+	"TGOx_ARITH_INSTR_MSG_ONLY",
+	"TGOx_TEX_MSGI_NUM_QUADS",
+	"TGOx_TEX_DFCH_NUM_PASSES",
+	"TGOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TGOx_TEX_TFCH_NUM_OPERATIONS",
+	"TGOx_TEX_FILT_NUM_OPERATIONS",
+	"TGOx_LS_MEM_READ_FULL",
+	"TGOx_LS_MEM_READ_SHORT",
+	"TGOx_LS_MEM_WRITE_FULL",
+	"TGOx_LS_MEM_WRITE_SHORT",
+	"TGOx_LS_MEM_ATOMIC",
+	"TGOx_VARY_INSTR",
+	"TGOx_VARY_SLOT_32",
+	"TGOx_VARY_SLOT_16",
+	"TGOx_ATTR_INSTR",
+	"TGOx_ARITH_INSTR_FP_MUL",
+	"TGOx_BEATS_RD_FTC",
+	"TGOx_BEATS_RD_FTC_EXT",
+	"TGOx_BEATS_RD_LSC",
+	"TGOx_BEATS_RD_LSC_EXT",
+	"TGOx_BEATS_RD_TEX",
+	"TGOx_BEATS_RD_TEX_EXT",
+	"TGOx_BEATS_RD_OTHER",
+	"TGOx_BEATS_WR_LSC_WB",
+	"TGOx_BEATS_WR_TIB",
+	"TGOx_BEATS_WR_LSC_OTHER",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TGOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TGOx_L2_RD_MSG_IN",
+	"TGOx_L2_RD_MSG_IN_STALL",
+	"TGOx_L2_WR_MSG_IN",
+	"TGOx_L2_WR_MSG_IN_STALL",
+	"TGOx_L2_SNP_MSG_IN",
+	"TGOx_L2_SNP_MSG_IN_STALL",
+	"TGOx_L2_RD_MSG_OUT",
+	"TGOx_L2_RD_MSG_OUT_STALL",
+	"TGOx_L2_WR_MSG_OUT",
+	"TGOx_L2_ANY_LOOKUP",
+	"TGOx_L2_READ_LOOKUP",
+	"TGOx_L2_WRITE_LOOKUP",
+	"TGOx_L2_EXT_SNOOP_LOOKUP",
+	"TGOx_L2_EXT_READ",
+	"TGOx_L2_EXT_READ_NOSNP",
+	"TGOx_L2_EXT_READ_UNIQUE",
+	"TGOx_L2_EXT_READ_BEATS",
+	"TGOx_L2_EXT_AR_STALL",
+	"TGOx_L2_EXT_AR_CNT_Q1",
+	"TGOx_L2_EXT_AR_CNT_Q2",
+	"TGOx_L2_EXT_AR_CNT_Q3",
+	"TGOx_L2_EXT_RRESP_0_127",
+	"TGOx_L2_EXT_RRESP_128_191",
+	"TGOx_L2_EXT_RRESP_192_255",
+	"TGOx_L2_EXT_RRESP_256_319",
+	"TGOx_L2_EXT_RRESP_320_383",
+	"TGOx_L2_EXT_WRITE",
+	"TGOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TGOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TGOx_L2_EXT_WRITE_SNP_FULL",
+	"TGOx_L2_EXT_WRITE_SNP_PTL",
+	"TGOx_L2_EXT_WRITE_BEATS",
+	"TGOx_L2_EXT_W_STALL",
+	"TGOx_L2_EXT_AW_CNT_Q1",
+	"TGOx_L2_EXT_AW_CNT_Q2",
+	"TGOx_L2_EXT_AW_CNT_Q3",
+	"TGOx_L2_EXT_SNOOP",
+	"TGOx_L2_EXT_SNOOP_STALL",
+	"TGOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TGOx_L2_EXT_SNOOP_RESP_DATA",
+	"TGOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 0000000000000..e24e91ab1ca4c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"THEx_BIN_ALLOC_INIT",
+	"THEx_BIN_ALLOC_OVERFLOW",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
new file mode 100644
index 0000000000000..73db45c232f17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tkax.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_
+
+static const char * const hardware_counters_mali_tKAx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MESSAGES_SENT",
+	"TKAx_MESSAGES_RECEIVED",
+	"TKAx_GPU_ACTIVE",
+	"TKAx_IRQ_ACTIVE",
+	"TKAx_JS0_JOBS",
+	"TKAx_JS0_TASKS",
+	"TKAx_JS0_ACTIVE",
+	"",
+	"TKAx_JS0_WAIT_READ",
+	"TKAx_JS0_WAIT_ISSUE",
+	"TKAx_JS0_WAIT_DEPEND",
+	"TKAx_JS0_WAIT_FINISH",
+	"TKAx_JS1_JOBS",
+	"TKAx_JS1_TASKS",
+	"TKAx_JS1_ACTIVE",
+	"",
+	"TKAx_JS1_WAIT_READ",
+	"TKAx_JS1_WAIT_ISSUE",
+	"TKAx_JS1_WAIT_DEPEND",
+	"TKAx_JS1_WAIT_FINISH",
+	"TKAx_JS2_JOBS",
+	"TKAx_JS2_TASKS",
+	"TKAx_JS2_ACTIVE",
+	"",
+	"TKAx_JS2_WAIT_READ",
+	"TKAx_JS2_WAIT_ISSUE",
+	"TKAx_JS2_WAIT_DEPEND",
+	"TKAx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_TILER_ACTIVE",
+	"TKAx_JOBS_PROCESSED",
+	"TKAx_TRIANGLES",
+	"TKAx_LINES",
+	"TKAx_POINTS",
+	"TKAx_FRONT_FACING",
+	"TKAx_BACK_FACING",
+	"TKAx_PRIM_VISIBLE",
+	"TKAx_PRIM_CULLED",
+	"TKAx_PRIM_CLIPPED",
+	"TKAx_PRIM_SAT_CULLED",
+	"TKAx_BIN_ALLOC_INIT",
+	"TKAx_BIN_ALLOC_OVERFLOW",
+	"TKAx_BUS_READ",
+	"",
+	"TKAx_BUS_WRITE",
+	"TKAx_LOADING_DESC",
+	"TKAx_IDVS_POS_SHAD_REQ",
+	"TKAx_IDVS_POS_SHAD_WAIT",
+	"TKAx_IDVS_POS_SHAD_STALL",
+	"TKAx_IDVS_POS_FIFO_FULL",
+	"TKAx_PREFETCH_STALL",
+	"TKAx_VCACHE_HIT",
+	"TKAx_VCACHE_MISS",
+	"TKAx_VCACHE_LINE_WAIT",
+	"TKAx_VFETCH_POS_READ_WAIT",
+	"TKAx_VFETCH_VERTEX_WAIT",
+	"TKAx_VFETCH_STALL",
+	"TKAx_PRIMASSY_STALL",
+	"TKAx_BBOX_GEN_STALL",
+	"TKAx_IDVS_VBU_HIT",
+	"TKAx_IDVS_VBU_MISS",
+	"TKAx_IDVS_VBU_LINE_DEALLOCATE",
+	"TKAx_IDVS_VAR_SHAD_REQ",
+	"TKAx_IDVS_VAR_SHAD_STALL",
+	"TKAx_BINNER_STALL",
+	"TKAx_ITER_STALL",
+	"TKAx_COMPRESS_MISS",
+	"TKAx_COMPRESS_STALL",
+	"TKAx_PCACHE_HIT",
+	"TKAx_PCACHE_MISS",
+	"TKAx_PCACHE_MISS_STALL",
+	"TKAx_PCACHE_EVICT_STALL",
+	"TKAx_PMGR_PTR_WR_STALL",
+	"TKAx_PMGR_PTR_RD_STALL",
+	"TKAx_PMGR_CMD_WR_STALL",
+	"TKAx_WRBUF_ACTIVE",
+	"TKAx_WRBUF_HIT",
+	"TKAx_WRBUF_MISS",
+	"TKAx_WRBUF_NO_FREE_LINE_STALL",
+	"TKAx_WRBUF_NO_AXI_ID_STALL",
+	"TKAx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TKAx_UTLB_TRANS",
+	"TKAx_UTLB_TRANS_HIT",
+	"TKAx_UTLB_TRANS_STALL",
+	"TKAx_UTLB_TRANS_MISS_DELAY",
+	"TKAx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_FRAG_ACTIVE",
+	"TKAx_FRAG_PRIMITIVES",
+	"TKAx_FRAG_PRIM_RAST",
+	"TKAx_FRAG_FPK_ACTIVE",
+	"TKAx_FRAG_STARVING",
+	"TKAx_FRAG_WARPS",
+	"TKAx_FRAG_PARTIAL_WARPS",
+	"TKAx_FRAG_QUADS_RAST",
+	"TKAx_FRAG_QUADS_EZS_TEST",
+	"TKAx_FRAG_QUADS_EZS_UPDATE",
+	"TKAx_FRAG_QUADS_EZS_KILL",
+	"TKAx_FRAG_LZS_TEST",
+	"TKAx_FRAG_LZS_KILL",
+	"TKAx_WARP_REG_SIZE_64",
+	"TKAx_FRAG_PTILES",
+	"TKAx_FRAG_TRANS_ELIM",
+	"TKAx_QUAD_FPK_KILLER",
+	"TKAx_FULL_QUAD_WARPS",
+	"TKAx_COMPUTE_ACTIVE",
+	"TKAx_COMPUTE_TASKS",
+	"TKAx_COMPUTE_WARPS",
+	"TKAx_COMPUTE_STARVING",
+	"TKAx_EXEC_CORE_ACTIVE",
+	"TKAx_EXEC_ACTIVE",
+	"TKAx_EXEC_INSTR_COUNT",
+	"TKAx_EXEC_INSTR_DIVERGED",
+	"TKAx_EXEC_INSTR_STARVING",
+	"TKAx_ARITH_INSTR_SINGLE_FMA",
+	"TKAx_ARITH_INSTR_DOUBLE",
+	"TKAx_ARITH_INSTR_MSG",
+	"TKAx_ARITH_INSTR_MSG_ONLY",
+	"TKAx_TEX_MSGI_NUM_QUADS",
+	"TKAx_TEX_DFCH_NUM_PASSES",
+	"TKAx_TEX_DFCH_NUM_PASSES_MISS",
+	"TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TKAx_TEX_TFCH_NUM_OPERATIONS",
+	"TKAx_TEX_FILT_NUM_OPERATIONS",
+	"TKAx_LS_MEM_READ_FULL",
+	"TKAx_LS_MEM_READ_SHORT",
+	"TKAx_LS_MEM_WRITE_FULL",
+	"TKAx_LS_MEM_WRITE_SHORT",
+	"TKAx_LS_MEM_ATOMIC",
+	"TKAx_VARY_INSTR",
+	"TKAx_VARY_SLOT_32",
+	"TKAx_VARY_SLOT_16",
+	"TKAx_ATTR_INSTR",
+	"TKAx_ARITH_INSTR_FP_MUL",
+	"TKAx_BEATS_RD_FTC",
+	"TKAx_BEATS_RD_FTC_EXT",
+	"TKAx_BEATS_RD_LSC",
+	"TKAx_BEATS_RD_LSC_EXT",
+	"TKAx_BEATS_RD_TEX",
+	"TKAx_BEATS_RD_TEX_EXT",
+	"TKAx_BEATS_RD_OTHER",
+	"TKAx_BEATS_WR_LSC_OTHER",
+	"TKAx_BEATS_WR_TIB",
+	"TKAx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TKAx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TKAx_L2_RD_MSG_IN",
+	"TKAx_L2_RD_MSG_IN_STALL",
+	"TKAx_L2_WR_MSG_IN",
+	"TKAx_L2_WR_MSG_IN_STALL",
+	"TKAx_L2_SNP_MSG_IN",
+	"TKAx_L2_SNP_MSG_IN_STALL",
+	"TKAx_L2_RD_MSG_OUT",
+	"TKAx_L2_RD_MSG_OUT_STALL",
+	"TKAx_L2_WR_MSG_OUT",
+	"TKAx_L2_ANY_LOOKUP",
+	"TKAx_L2_READ_LOOKUP",
+	"TKAx_L2_WRITE_LOOKUP",
+	"TKAx_L2_EXT_SNOOP_LOOKUP",
+	"TKAx_L2_EXT_READ",
+	"TKAx_L2_EXT_READ_NOSNP",
+	"TKAx_L2_EXT_READ_UNIQUE",
+	"TKAx_L2_EXT_READ_BEATS",
+	"TKAx_L2_EXT_AR_STALL",
+	"TKAx_L2_EXT_AR_CNT_Q1",
+	"TKAx_L2_EXT_AR_CNT_Q2",
+	"TKAx_L2_EXT_AR_CNT_Q3",
+	"TKAx_L2_EXT_RRESP_0_127",
+	"TKAx_L2_EXT_RRESP_128_191",
+	"TKAx_L2_EXT_RRESP_192_255",
+	"TKAx_L2_EXT_RRESP_256_319",
+	"TKAx_L2_EXT_RRESP_320_383",
+	"TKAx_L2_EXT_WRITE",
+	"TKAx_L2_EXT_WRITE_NOSNP_FULL",
+	"TKAx_L2_EXT_WRITE_NOSNP_PTL",
+	"TKAx_L2_EXT_WRITE_SNP_FULL",
+	"TKAx_L2_EXT_WRITE_SNP_PTL",
+	"TKAx_L2_EXT_WRITE_BEATS",
+	"TKAx_L2_EXT_W_STALL",
+	"TKAx_L2_EXT_AW_CNT_Q1",
+	"TKAx_L2_EXT_AW_CNT_Q2",
+	"TKAx_L2_EXT_AW_CNT_Q3",
+	"TKAx_L2_EXT_SNOOP",
+	"TKAx_L2_EXT_SNOOP_STALL",
+	"TKAx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TKAx_L2_EXT_SNOOP_RESP_DATA",
+	"TKAx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
new file mode 100644
index 0000000000000..63eac50e0cc72
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_
+
+static const char * const hardware_counters_mali_tMIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MESSAGES_SENT",
+	"TMIx_MESSAGES_RECEIVED",
+	"TMIx_GPU_ACTIVE",
+	"TMIx_IRQ_ACTIVE",
+	"TMIx_JS0_JOBS",
+	"TMIx_JS0_TASKS",
+	"TMIx_JS0_ACTIVE",
+	"",
+	"TMIx_JS0_WAIT_READ",
+	"TMIx_JS0_WAIT_ISSUE",
+	"TMIx_JS0_WAIT_DEPEND",
+	"TMIx_JS0_WAIT_FINISH",
+	"TMIx_JS1_JOBS",
+	"TMIx_JS1_TASKS",
+	"TMIx_JS1_ACTIVE",
+	"",
+	"TMIx_JS1_WAIT_READ",
+	"TMIx_JS1_WAIT_ISSUE",
+	"TMIx_JS1_WAIT_DEPEND",
+	"TMIx_JS1_WAIT_FINISH",
+	"TMIx_JS2_JOBS",
+	"TMIx_JS2_TASKS",
+	"TMIx_JS2_ACTIVE",
+	"",
+	"TMIx_JS2_WAIT_READ",
+	"TMIx_JS2_WAIT_ISSUE",
+	"TMIx_JS2_WAIT_DEPEND",
+	"TMIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_TILER_ACTIVE",
+	"TMIx_JOBS_PROCESSED",
+	"TMIx_TRIANGLES",
+	"TMIx_LINES",
+	"TMIx_POINTS",
+	"TMIx_FRONT_FACING",
+	"TMIx_BACK_FACING",
+	"TMIx_PRIM_VISIBLE",
+	"TMIx_PRIM_CULLED",
+	"TMIx_PRIM_CLIPPED",
+	"TMIx_PRIM_SAT_CULLED",
+	"TMIx_BIN_ALLOC_INIT",
+	"TMIx_BIN_ALLOC_OVERFLOW",
+	"TMIx_BUS_READ",
+	"",
+	"TMIx_BUS_WRITE",
+	"TMIx_LOADING_DESC",
+	"TMIx_IDVS_POS_SHAD_REQ",
+	"TMIx_IDVS_POS_SHAD_WAIT",
+	"TMIx_IDVS_POS_SHAD_STALL",
+	"TMIx_IDVS_POS_FIFO_FULL",
+	"TMIx_PREFETCH_STALL",
+	"TMIx_VCACHE_HIT",
+	"TMIx_VCACHE_MISS",
+	"TMIx_VCACHE_LINE_WAIT",
+	"TMIx_VFETCH_POS_READ_WAIT",
+	"TMIx_VFETCH_VERTEX_WAIT",
+	"TMIx_VFETCH_STALL",
+	"TMIx_PRIMASSY_STALL",
+	"TMIx_BBOX_GEN_STALL",
+	"TMIx_IDVS_VBU_HIT",
+	"TMIx_IDVS_VBU_MISS",
+	"TMIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TMIx_IDVS_VAR_SHAD_REQ",
+	"TMIx_IDVS_VAR_SHAD_STALL",
+	"TMIx_BINNER_STALL",
+	"TMIx_ITER_STALL",
+	"TMIx_COMPRESS_MISS",
+	"TMIx_COMPRESS_STALL",
+	"TMIx_PCACHE_HIT",
+	"TMIx_PCACHE_MISS",
+	"TMIx_PCACHE_MISS_STALL",
+	"TMIx_PCACHE_EVICT_STALL",
+	"TMIx_PMGR_PTR_WR_STALL",
+	"TMIx_PMGR_PTR_RD_STALL",
+	"TMIx_PMGR_CMD_WR_STALL",
+	"TMIx_WRBUF_ACTIVE",
+	"TMIx_WRBUF_HIT",
+	"TMIx_WRBUF_MISS",
+	"TMIx_WRBUF_NO_FREE_LINE_STALL",
+	"TMIx_WRBUF_NO_AXI_ID_STALL",
+	"TMIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TMIx_UTLB_TRANS",
+	"TMIx_UTLB_TRANS_HIT",
+	"TMIx_UTLB_TRANS_STALL",
+	"TMIx_UTLB_TRANS_MISS_DELAY",
+	"TMIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_FRAG_ACTIVE",
+	"TMIx_FRAG_PRIMITIVES",
+	"TMIx_FRAG_PRIM_RAST",
+	"TMIx_FRAG_FPK_ACTIVE",
+	"TMIx_FRAG_STARVING",
+	"TMIx_FRAG_WARPS",
+	"TMIx_FRAG_PARTIAL_WARPS",
+	"TMIx_FRAG_QUADS_RAST",
+	"TMIx_FRAG_QUADS_EZS_TEST",
+	"TMIx_FRAG_QUADS_EZS_UPDATE",
+	"TMIx_FRAG_QUADS_EZS_KILL",
+	"TMIx_FRAG_LZS_TEST",
+	"TMIx_FRAG_LZS_KILL",
+	"",
+	"TMIx_FRAG_PTILES",
+	"TMIx_FRAG_TRANS_ELIM",
+	"TMIx_QUAD_FPK_KILLER",
+	"",
+	"TMIx_COMPUTE_ACTIVE",
+	"TMIx_COMPUTE_TASKS",
+	"TMIx_COMPUTE_WARPS",
+	"TMIx_COMPUTE_STARVING",
+	"TMIx_EXEC_CORE_ACTIVE",
+	"TMIx_EXEC_ACTIVE",
+	"TMIx_EXEC_INSTR_COUNT",
+	"TMIx_EXEC_INSTR_DIVERGED",
+	"TMIx_EXEC_INSTR_STARVING",
+	"TMIx_ARITH_INSTR_SINGLE_FMA",
+	"TMIx_ARITH_INSTR_DOUBLE",
+	"TMIx_ARITH_INSTR_MSG",
+	"TMIx_ARITH_INSTR_MSG_ONLY",
+	"TMIx_TEX_INSTR",
+	"TMIx_TEX_INSTR_MIPMAP",
+	"TMIx_TEX_INSTR_COMPRESSED",
+	"TMIx_TEX_INSTR_3D",
+	"TMIx_TEX_INSTR_TRILINEAR",
+	"TMIx_TEX_COORD_ISSUE",
+	"TMIx_TEX_COORD_STALL",
+	"TMIx_TEX_STARVE_CACHE",
+	"TMIx_TEX_STARVE_FILTER",
+	"TMIx_LS_MEM_READ_FULL",
+	"TMIx_LS_MEM_READ_SHORT",
+	"TMIx_LS_MEM_WRITE_FULL",
+	"TMIx_LS_MEM_WRITE_SHORT",
+	"TMIx_LS_MEM_ATOMIC",
+	"TMIx_VARY_INSTR",
+	"TMIx_VARY_SLOT_32",
+	"TMIx_VARY_SLOT_16",
+	"TMIx_ATTR_INSTR",
+	"TMIx_ARITH_INSTR_FP_MUL",
+	"TMIx_BEATS_RD_FTC",
+	"TMIx_BEATS_RD_FTC_EXT",
+	"TMIx_BEATS_RD_LSC",
+	"TMIx_BEATS_RD_LSC_EXT",
+	"TMIx_BEATS_RD_TEX",
+	"TMIx_BEATS_RD_TEX_EXT",
+	"TMIx_BEATS_RD_OTHER",
+	"TMIx_BEATS_WR_LSC",
+	"TMIx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TMIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TMIx_L2_RD_MSG_IN",
+	"TMIx_L2_RD_MSG_IN_STALL",
+	"TMIx_L2_WR_MSG_IN",
+	"TMIx_L2_WR_MSG_IN_STALL",
+	"TMIx_L2_SNP_MSG_IN",
+	"TMIx_L2_SNP_MSG_IN_STALL",
+	"TMIx_L2_RD_MSG_OUT",
+	"TMIx_L2_RD_MSG_OUT_STALL",
+	"TMIx_L2_WR_MSG_OUT",
+	"TMIx_L2_ANY_LOOKUP",
+	"TMIx_L2_READ_LOOKUP",
+	"TMIx_L2_WRITE_LOOKUP",
+	"TMIx_L2_EXT_SNOOP_LOOKUP",
+	"TMIx_L2_EXT_READ",
+	"TMIx_L2_EXT_READ_NOSNP",
+	"TMIx_L2_EXT_READ_UNIQUE",
+	"TMIx_L2_EXT_READ_BEATS",
+	"TMIx_L2_EXT_AR_STALL",
+	"TMIx_L2_EXT_AR_CNT_Q1",
+	"TMIx_L2_EXT_AR_CNT_Q2",
+	"TMIx_L2_EXT_AR_CNT_Q3",
+	"TMIx_L2_EXT_RRESP_0_127",
+	"TMIx_L2_EXT_RRESP_128_191",
+	"TMIx_L2_EXT_RRESP_192_255",
+	"TMIx_L2_EXT_RRESP_256_319",
+	"TMIx_L2_EXT_RRESP_320_383",
+	"TMIx_L2_EXT_WRITE",
+	"TMIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TMIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TMIx_L2_EXT_WRITE_SNP_FULL",
+	"TMIx_L2_EXT_WRITE_SNP_PTL",
+	"TMIx_L2_EXT_WRITE_BEATS",
+	"TMIx_L2_EXT_W_STALL",
+	"TMIx_L2_EXT_AW_CNT_Q1",
+	"TMIx_L2_EXT_AW_CNT_Q2",
+	"TMIx_L2_EXT_AW_CNT_Q3",
+	"TMIx_L2_EXT_SNOOP",
+	"TMIx_L2_EXT_SNOOP_STALL",
+	"TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TMIx_L2_EXT_SNOOP_RESP_DATA",
+	"TMIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
new file mode 100644
index 0000000000000..932663cfb6a95
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tnox.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_
+
+static const char * const hardware_counters_mali_tNOx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MESSAGES_SENT",
+	"TNOx_MESSAGES_RECEIVED",
+	"TNOx_GPU_ACTIVE",
+	"TNOx_IRQ_ACTIVE",
+	"TNOx_JS0_JOBS",
+	"TNOx_JS0_TASKS",
+	"TNOx_JS0_ACTIVE",
+	"",
+	"TNOx_JS0_WAIT_READ",
+	"TNOx_JS0_WAIT_ISSUE",
+	"TNOx_JS0_WAIT_DEPEND",
+	"TNOx_JS0_WAIT_FINISH",
+	"TNOx_JS1_JOBS",
+	"TNOx_JS1_TASKS",
+	"TNOx_JS1_ACTIVE",
+	"",
+	"TNOx_JS1_WAIT_READ",
+	"TNOx_JS1_WAIT_ISSUE",
+	"TNOx_JS1_WAIT_DEPEND",
+	"TNOx_JS1_WAIT_FINISH",
+	"TNOx_JS2_JOBS",
+	"TNOx_JS2_TASKS",
+	"TNOx_JS2_ACTIVE",
+	"",
+	"TNOx_JS2_WAIT_READ",
+	"TNOx_JS2_WAIT_ISSUE",
+	"TNOx_JS2_WAIT_DEPEND",
+	"TNOx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_TILER_ACTIVE",
+	"TNOx_JOBS_PROCESSED",
+	"TNOx_TRIANGLES",
+	"TNOx_LINES",
+	"TNOx_POINTS",
+	"TNOx_FRONT_FACING",
+	"TNOx_BACK_FACING",
+	"TNOx_PRIM_VISIBLE",
+	"TNOx_PRIM_CULLED",
+	"TNOx_PRIM_CLIPPED",
+	"TNOx_PRIM_SAT_CULLED",
+	"TNOx_BIN_ALLOC_INIT",
+	"TNOx_BIN_ALLOC_OVERFLOW",
+	"TNOx_BUS_READ",
+	"",
+	"TNOx_BUS_WRITE",
+	"TNOx_LOADING_DESC",
+	"TNOx_IDVS_POS_SHAD_REQ",
+	"TNOx_IDVS_POS_SHAD_WAIT",
+	"TNOx_IDVS_POS_SHAD_STALL",
+	"TNOx_IDVS_POS_FIFO_FULL",
+	"TNOx_PREFETCH_STALL",
+	"TNOx_VCACHE_HIT",
+	"TNOx_VCACHE_MISS",
+	"TNOx_VCACHE_LINE_WAIT",
+	"TNOx_VFETCH_POS_READ_WAIT",
+	"TNOx_VFETCH_VERTEX_WAIT",
+	"TNOx_VFETCH_STALL",
+	"TNOx_PRIMASSY_STALL",
+	"TNOx_BBOX_GEN_STALL",
+	"TNOx_IDVS_VBU_HIT",
+	"TNOx_IDVS_VBU_MISS",
+	"TNOx_IDVS_VBU_LINE_DEALLOCATE",
+	"TNOx_IDVS_VAR_SHAD_REQ",
+	"TNOx_IDVS_VAR_SHAD_STALL",
+	"TNOx_BINNER_STALL",
+	"TNOx_ITER_STALL",
+	"TNOx_COMPRESS_MISS",
+	"TNOx_COMPRESS_STALL",
+	"TNOx_PCACHE_HIT",
+	"TNOx_PCACHE_MISS",
+	"TNOx_PCACHE_MISS_STALL",
+	"TNOx_PCACHE_EVICT_STALL",
+	"TNOx_PMGR_PTR_WR_STALL",
+	"TNOx_PMGR_PTR_RD_STALL",
+	"TNOx_PMGR_CMD_WR_STALL",
+	"TNOx_WRBUF_ACTIVE",
+	"TNOx_WRBUF_HIT",
+	"TNOx_WRBUF_MISS",
+	"TNOx_WRBUF_NO_FREE_LINE_STALL",
+	"TNOx_WRBUF_NO_AXI_ID_STALL",
+	"TNOx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TNOx_UTLB_TRANS",
+	"TNOx_UTLB_TRANS_HIT",
+	"TNOx_UTLB_TRANS_STALL",
+	"TNOx_UTLB_TRANS_MISS_DELAY",
+	"TNOx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_FRAG_ACTIVE",
+	"TNOx_FRAG_PRIMITIVES",
+	"TNOx_FRAG_PRIM_RAST",
+	"TNOx_FRAG_FPK_ACTIVE",
+	"TNOx_FRAG_STARVING",
+	"TNOx_FRAG_WARPS",
+	"TNOx_FRAG_PARTIAL_WARPS",
+	"TNOx_FRAG_QUADS_RAST",
+	"TNOx_FRAG_QUADS_EZS_TEST",
+	"TNOx_FRAG_QUADS_EZS_UPDATE",
+	"TNOx_FRAG_QUADS_EZS_KILL",
+	"TNOx_FRAG_LZS_TEST",
+	"TNOx_FRAG_LZS_KILL",
+	"TNOx_WARP_REG_SIZE_64",
+	"TNOx_FRAG_PTILES",
+	"TNOx_FRAG_TRANS_ELIM",
+	"TNOx_QUAD_FPK_KILLER",
+	"TNOx_FULL_QUAD_WARPS",
+	"TNOx_COMPUTE_ACTIVE",
+	"TNOx_COMPUTE_TASKS",
+	"TNOx_COMPUTE_WARPS",
+	"TNOx_COMPUTE_STARVING",
+	"TNOx_EXEC_CORE_ACTIVE",
+	"TNOx_EXEC_ACTIVE",
+	"TNOx_EXEC_INSTR_COUNT",
+	"TNOx_EXEC_INSTR_DIVERGED",
+	"TNOx_EXEC_INSTR_STARVING",
+	"TNOx_ARITH_INSTR_SINGLE_FMA",
+	"TNOx_ARITH_INSTR_DOUBLE",
+	"TNOx_ARITH_INSTR_MSG",
+	"TNOx_ARITH_INSTR_MSG_ONLY",
+	"TNOx_TEX_MSGI_NUM_QUADS",
+	"TNOx_TEX_DFCH_NUM_PASSES",
+	"TNOx_TEX_DFCH_NUM_PASSES_MISS",
+	"TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TNOx_TEX_TFCH_NUM_OPERATIONS",
+	"TNOx_TEX_FILT_NUM_OPERATIONS",
+	"TNOx_LS_MEM_READ_FULL",
+	"TNOx_LS_MEM_READ_SHORT",
+	"TNOx_LS_MEM_WRITE_FULL",
+	"TNOx_LS_MEM_WRITE_SHORT",
+	"TNOx_LS_MEM_ATOMIC",
+	"TNOx_VARY_INSTR",
+	"TNOx_VARY_SLOT_32",
+	"TNOx_VARY_SLOT_16",
+	"TNOx_ATTR_INSTR",
+	"TNOx_ARITH_INSTR_FP_MUL",
+	"TNOx_BEATS_RD_FTC",
+	"TNOx_BEATS_RD_FTC_EXT",
+	"TNOx_BEATS_RD_LSC",
+	"TNOx_BEATS_RD_LSC_EXT",
+	"TNOx_BEATS_RD_TEX",
+	"TNOx_BEATS_RD_TEX_EXT",
+	"TNOx_BEATS_RD_OTHER",
+	"TNOx_BEATS_WR_LSC_OTHER",
+	"TNOx_BEATS_WR_TIB",
+	"TNOx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TNOx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TNOx_L2_RD_MSG_IN",
+	"TNOx_L2_RD_MSG_IN_STALL",
+	"TNOx_L2_WR_MSG_IN",
+	"TNOx_L2_WR_MSG_IN_STALL",
+	"TNOx_L2_SNP_MSG_IN",
+	"TNOx_L2_SNP_MSG_IN_STALL",
+	"TNOx_L2_RD_MSG_OUT",
+	"TNOx_L2_RD_MSG_OUT_STALL",
+	"TNOx_L2_WR_MSG_OUT",
+	"TNOx_L2_ANY_LOOKUP",
+	"TNOx_L2_READ_LOOKUP",
+	"TNOx_L2_WRITE_LOOKUP",
+	"TNOx_L2_EXT_SNOOP_LOOKUP",
+	"TNOx_L2_EXT_READ",
+	"TNOx_L2_EXT_READ_NOSNP",
+	"TNOx_L2_EXT_READ_UNIQUE",
+	"TNOx_L2_EXT_READ_BEATS",
+	"TNOx_L2_EXT_AR_STALL",
+	"TNOx_L2_EXT_AR_CNT_Q1",
+	"TNOx_L2_EXT_AR_CNT_Q2",
+	"TNOx_L2_EXT_AR_CNT_Q3",
+	"TNOx_L2_EXT_RRESP_0_127",
+	"TNOx_L2_EXT_RRESP_128_191",
+	"TNOx_L2_EXT_RRESP_192_255",
+	"TNOx_L2_EXT_RRESP_256_319",
+	"TNOx_L2_EXT_RRESP_320_383",
+	"TNOx_L2_EXT_WRITE",
+	"TNOx_L2_EXT_WRITE_NOSNP_FULL",
+	"TNOx_L2_EXT_WRITE_NOSNP_PTL",
+	"TNOx_L2_EXT_WRITE_SNP_FULL",
+	"TNOx_L2_EXT_WRITE_SNP_PTL",
+	"TNOx_L2_EXT_WRITE_BEATS",
+	"TNOx_L2_EXT_W_STALL",
+	"TNOx_L2_EXT_AW_CNT_Q1",
+	"TNOx_L2_EXT_AW_CNT_Q2",
+	"TNOx_L2_EXT_AW_CNT_Q3",
+	"TNOx_L2_EXT_SNOOP",
+	"TNOx_L2_EXT_SNOOP_STALL",
+	"TNOx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TNOx_L2_EXT_SNOOP_RESP_DATA",
+	"TNOx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
new file mode 100644
index 0000000000000..b8dde32bc529e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tsix.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_
+
+static const char * const hardware_counters_mali_tSIx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MESSAGES_SENT",
+	"TSIx_MESSAGES_RECEIVED",
+	"TSIx_GPU_ACTIVE",
+	"TSIx_IRQ_ACTIVE",
+	"TSIx_JS0_JOBS",
+	"TSIx_JS0_TASKS",
+	"TSIx_JS0_ACTIVE",
+	"",
+	"TSIx_JS0_WAIT_READ",
+	"TSIx_JS0_WAIT_ISSUE",
+	"TSIx_JS0_WAIT_DEPEND",
+	"TSIx_JS0_WAIT_FINISH",
+	"TSIx_JS1_JOBS",
+	"TSIx_JS1_TASKS",
+	"TSIx_JS1_ACTIVE",
+	"",
+	"TSIx_JS1_WAIT_READ",
+	"TSIx_JS1_WAIT_ISSUE",
+	"TSIx_JS1_WAIT_DEPEND",
+	"TSIx_JS1_WAIT_FINISH",
+	"TSIx_JS2_JOBS",
+	"TSIx_JS2_TASKS",
+	"TSIx_JS2_ACTIVE",
+	"",
+	"TSIx_JS2_WAIT_READ",
+	"TSIx_JS2_WAIT_ISSUE",
+	"TSIx_JS2_WAIT_DEPEND",
+	"TSIx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_TILER_ACTIVE",
+	"TSIx_JOBS_PROCESSED",
+	"TSIx_TRIANGLES",
+	"TSIx_LINES",
+	"TSIx_POINTS",
+	"TSIx_FRONT_FACING",
+	"TSIx_BACK_FACING",
+	"TSIx_PRIM_VISIBLE",
+	"TSIx_PRIM_CULLED",
+	"TSIx_PRIM_CLIPPED",
+	"TSIx_PRIM_SAT_CULLED",
+	"TSIx_BIN_ALLOC_INIT",
+	"TSIx_BIN_ALLOC_OVERFLOW",
+	"TSIx_BUS_READ",
+	"",
+	"TSIx_BUS_WRITE",
+	"TSIx_LOADING_DESC",
+	"TSIx_IDVS_POS_SHAD_REQ",
+	"TSIx_IDVS_POS_SHAD_WAIT",
+	"TSIx_IDVS_POS_SHAD_STALL",
+	"TSIx_IDVS_POS_FIFO_FULL",
+	"TSIx_PREFETCH_STALL",
+	"TSIx_VCACHE_HIT",
+	"TSIx_VCACHE_MISS",
+	"TSIx_VCACHE_LINE_WAIT",
+	"TSIx_VFETCH_POS_READ_WAIT",
+	"TSIx_VFETCH_VERTEX_WAIT",
+	"TSIx_VFETCH_STALL",
+	"TSIx_PRIMASSY_STALL",
+	"TSIx_BBOX_GEN_STALL",
+	"TSIx_IDVS_VBU_HIT",
+	"TSIx_IDVS_VBU_MISS",
+	"TSIx_IDVS_VBU_LINE_DEALLOCATE",
+	"TSIx_IDVS_VAR_SHAD_REQ",
+	"TSIx_IDVS_VAR_SHAD_STALL",
+	"TSIx_BINNER_STALL",
+	"TSIx_ITER_STALL",
+	"TSIx_COMPRESS_MISS",
+	"TSIx_COMPRESS_STALL",
+	"TSIx_PCACHE_HIT",
+	"TSIx_PCACHE_MISS",
+	"TSIx_PCACHE_MISS_STALL",
+	"TSIx_PCACHE_EVICT_STALL",
+	"TSIx_PMGR_PTR_WR_STALL",
+	"TSIx_PMGR_PTR_RD_STALL",
+	"TSIx_PMGR_CMD_WR_STALL",
+	"TSIx_WRBUF_ACTIVE",
+	"TSIx_WRBUF_HIT",
+	"TSIx_WRBUF_MISS",
+	"TSIx_WRBUF_NO_FREE_LINE_STALL",
+	"TSIx_WRBUF_NO_AXI_ID_STALL",
+	"TSIx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TSIx_UTLB_TRANS",
+	"TSIx_UTLB_TRANS_HIT",
+	"TSIx_UTLB_TRANS_STALL",
+	"TSIx_UTLB_TRANS_MISS_DELAY",
+	"TSIx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_FRAG_ACTIVE",
+	"TSIx_FRAG_PRIMITIVES",
+	"TSIx_FRAG_PRIM_RAST",
+	"TSIx_FRAG_FPK_ACTIVE",
+	"TSIx_FRAG_STARVING",
+	"TSIx_FRAG_WARPS",
+	"TSIx_FRAG_PARTIAL_WARPS",
+	"TSIx_FRAG_QUADS_RAST",
+	"TSIx_FRAG_QUADS_EZS_TEST",
+	"TSIx_FRAG_QUADS_EZS_UPDATE",
+	"TSIx_FRAG_QUADS_EZS_KILL",
+	"TSIx_FRAG_LZS_TEST",
+	"TSIx_FRAG_LZS_KILL",
+	"",
+	"TSIx_FRAG_PTILES",
+	"TSIx_FRAG_TRANS_ELIM",
+	"TSIx_QUAD_FPK_KILLER",
+	"",
+	"TSIx_COMPUTE_ACTIVE",
+	"TSIx_COMPUTE_TASKS",
+	"TSIx_COMPUTE_WARPS",
+	"TSIx_COMPUTE_STARVING",
+	"TSIx_EXEC_CORE_ACTIVE",
+	"TSIx_EXEC_ACTIVE",
+	"TSIx_EXEC_INSTR_COUNT",
+	"TSIx_EXEC_INSTR_DIVERGED",
+	"TSIx_EXEC_INSTR_STARVING",
+	"TSIx_ARITH_INSTR_SINGLE_FMA",
+	"TSIx_ARITH_INSTR_DOUBLE",
+	"TSIx_ARITH_INSTR_MSG",
+	"TSIx_ARITH_INSTR_MSG_ONLY",
+	"TSIx_TEX_MSGI_NUM_QUADS",
+	"TSIx_TEX_DFCH_NUM_PASSES",
+	"TSIx_TEX_DFCH_NUM_PASSES_MISS",
+	"TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TSIx_TEX_TFCH_NUM_OPERATIONS",
+	"TSIx_TEX_FILT_NUM_OPERATIONS",
+	"TSIx_LS_MEM_READ_FULL",
+	"TSIx_LS_MEM_READ_SHORT",
+	"TSIx_LS_MEM_WRITE_FULL",
+	"TSIx_LS_MEM_WRITE_SHORT",
+	"TSIx_LS_MEM_ATOMIC",
+	"TSIx_VARY_INSTR",
+	"TSIx_VARY_SLOT_32",
+	"TSIx_VARY_SLOT_16",
+	"TSIx_ATTR_INSTR",
+	"TSIx_ARITH_INSTR_FP_MUL",
+	"TSIx_BEATS_RD_FTC",
+	"TSIx_BEATS_RD_FTC_EXT",
+	"TSIx_BEATS_RD_LSC",
+	"TSIx_BEATS_RD_LSC_EXT",
+	"TSIx_BEATS_RD_TEX",
+	"TSIx_BEATS_RD_TEX_EXT",
+	"TSIx_BEATS_RD_OTHER",
+	"TSIx_BEATS_WR_LSC_OTHER",
+	"TSIx_BEATS_WR_TIB",
+	"TSIx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TSIx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TSIx_L2_RD_MSG_IN",
+	"TSIx_L2_RD_MSG_IN_STALL",
+	"TSIx_L2_WR_MSG_IN",
+	"TSIx_L2_WR_MSG_IN_STALL",
+	"TSIx_L2_SNP_MSG_IN",
+	"TSIx_L2_SNP_MSG_IN_STALL",
+	"TSIx_L2_RD_MSG_OUT",
+	"TSIx_L2_RD_MSG_OUT_STALL",
+	"TSIx_L2_WR_MSG_OUT",
+	"TSIx_L2_ANY_LOOKUP",
+	"TSIx_L2_READ_LOOKUP",
+	"TSIx_L2_WRITE_LOOKUP",
+	"TSIx_L2_EXT_SNOOP_LOOKUP",
+	"TSIx_L2_EXT_READ",
+	"TSIx_L2_EXT_READ_NOSNP",
+	"TSIx_L2_EXT_READ_UNIQUE",
+	"TSIx_L2_EXT_READ_BEATS",
+	"TSIx_L2_EXT_AR_STALL",
+	"TSIx_L2_EXT_AR_CNT_Q1",
+	"TSIx_L2_EXT_AR_CNT_Q2",
+	"TSIx_L2_EXT_AR_CNT_Q3",
+	"TSIx_L2_EXT_RRESP_0_127",
+	"TSIx_L2_EXT_RRESP_128_191",
+	"TSIx_L2_EXT_RRESP_192_255",
+	"TSIx_L2_EXT_RRESP_256_319",
+	"TSIx_L2_EXT_RRESP_320_383",
+	"TSIx_L2_EXT_WRITE",
+	"TSIx_L2_EXT_WRITE_NOSNP_FULL",
+	"TSIx_L2_EXT_WRITE_NOSNP_PTL",
+	"TSIx_L2_EXT_WRITE_SNP_FULL",
+	"TSIx_L2_EXT_WRITE_SNP_PTL",
+	"TSIx_L2_EXT_WRITE_BEATS",
+	"TSIx_L2_EXT_W_STALL",
+	"TSIx_L2_EXT_AW_CNT_Q1",
+	"TSIx_L2_EXT_AW_CNT_Q2",
+	"TSIx_L2_EXT_AW_CNT_Q3",
+	"TSIx_L2_EXT_SNOOP",
+	"TSIx_L2_EXT_SNOOP_STALL",
+	"TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TSIx_L2_EXT_SNOOP_RESP_DATA",
+	"TSIx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
new file mode 100644
index 0000000000000..c1e315b0f534c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_TTRX_H_
+
+static const char * const hardware_counters_mali_tTRx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_MESSAGES_SENT",
+	"TTRx_MESSAGES_RECEIVED",
+	"TTRx_GPU_ACTIVE",
+	"TTRx_IRQ_ACTIVE",
+	"TTRx_JS0_JOBS",
+	"TTRx_JS0_TASKS",
+	"TTRx_JS0_ACTIVE",
+	"",
+	"TTRx_JS0_WAIT_READ",
+	"TTRx_JS0_WAIT_ISSUE",
+	"TTRx_JS0_WAIT_DEPEND",
+	"TTRx_JS0_WAIT_FINISH",
+	"TTRx_JS1_JOBS",
+	"TTRx_JS1_TASKS",
+	"TTRx_JS1_ACTIVE",
+	"",
+	"TTRx_JS1_WAIT_READ",
+	"TTRx_JS1_WAIT_ISSUE",
+	"TTRx_JS1_WAIT_DEPEND",
+	"TTRx_JS1_WAIT_FINISH",
+	"TTRx_JS2_JOBS",
+	"TTRx_JS2_TASKS",
+	"TTRx_JS2_ACTIVE",
+	"",
+	"TTRx_JS2_WAIT_READ",
+	"TTRx_JS2_WAIT_ISSUE",
+	"TTRx_JS2_WAIT_DEPEND",
+	"TTRx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_TILER_ACTIVE",
+	"TTRx_JOBS_PROCESSED",
+	"TTRx_TRIANGLES",
+	"TTRx_LINES",
+	"TTRx_POINTS",
+	"TTRx_FRONT_FACING",
+	"TTRx_BACK_FACING",
+	"TTRx_PRIM_VISIBLE",
+	"TTRx_PRIM_CULLED",
+	"TTRx_PRIM_CLIPPED",
+	"TTRx_PRIM_SAT_CULLED",
+	"TTRx_BIN_ALLOC_INIT",
+	"TTRx_BIN_ALLOC_OVERFLOW",
+	"TTRx_BUS_READ",
+	"",
+	"TTRx_BUS_WRITE",
+	"TTRx_LOADING_DESC",
+	"TTRx_IDVS_POS_SHAD_REQ",
+	"TTRx_IDVS_POS_SHAD_WAIT",
+	"TTRx_IDVS_POS_SHAD_STALL",
+	"TTRx_IDVS_POS_FIFO_FULL",
+	"TTRx_PREFETCH_STALL",
+	"TTRx_VCACHE_HIT",
+	"TTRx_VCACHE_MISS",
+	"TTRx_VCACHE_LINE_WAIT",
+	"TTRx_VFETCH_POS_READ_WAIT",
+	"TTRx_VFETCH_VERTEX_WAIT",
+	"TTRx_VFETCH_STALL",
+	"TTRx_PRIMASSY_STALL",
+	"TTRx_BBOX_GEN_STALL",
+	"TTRx_IDVS_VBU_HIT",
+	"TTRx_IDVS_VBU_MISS",
+	"TTRx_IDVS_VBU_LINE_DEALLOCATE",
+	"TTRx_IDVS_VAR_SHAD_REQ",
+	"TTRx_IDVS_VAR_SHAD_STALL",
+	"TTRx_BINNER_STALL",
+	"TTRx_ITER_STALL",
+	"TTRx_COMPRESS_MISS",
+	"TTRx_COMPRESS_STALL",
+	"TTRx_PCACHE_HIT",
+	"TTRx_PCACHE_MISS",
+	"TTRx_PCACHE_MISS_STALL",
+	"TTRx_PCACHE_EVICT_STALL",
+	"TTRx_PMGR_PTR_WR_STALL",
+	"TTRx_PMGR_PTR_RD_STALL",
+	"TTRx_PMGR_CMD_WR_STALL",
+	"TTRx_WRBUF_ACTIVE",
+	"TTRx_WRBUF_HIT",
+	"TTRx_WRBUF_MISS",
+	"TTRx_WRBUF_NO_FREE_LINE_STALL",
+	"TTRx_WRBUF_NO_AXI_ID_STALL",
+	"TTRx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"TTRx_UTLB_TRANS",
+	"TTRx_UTLB_TRANS_HIT",
+	"TTRx_UTLB_TRANS_STALL",
+	"TTRx_UTLB_TRANS_MISS_DELAY",
+	"TTRx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_FRAG_ACTIVE",
+	"TTRx_FRAG_PRIMITIVES",
+	"TTRx_FRAG_PRIM_RAST",
+	"TTRx_FRAG_FPK_ACTIVE",
+	"TTRx_FRAG_STARVING",
+	"TTRx_FRAG_WARPS",
+	"TTRx_FRAG_PARTIAL_WARPS",
+	"TTRx_FRAG_QUADS_RAST",
+	"TTRx_FRAG_QUADS_EZS_TEST",
+	"TTRx_FRAG_QUADS_EZS_UPDATE",
+	"TTRx_FRAG_QUADS_EZS_KILL",
+	"TTRx_FRAG_LZS_TEST",
+	"TTRx_FRAG_LZS_KILL",
+	"TTRx_WARP_REG_SIZE_64",
+	"TTRx_FRAG_PTILES",
+	"TTRx_FRAG_TRANS_ELIM",
+	"TTRx_QUAD_FPK_KILLER",
+	"TTRx_FULL_QUAD_WARPS",
+	"TTRx_COMPUTE_ACTIVE",
+	"TTRx_COMPUTE_TASKS",
+	"TTRx_COMPUTE_WARPS",
+	"TTRx_COMPUTE_STARVING",
+	"TTRx_EXEC_CORE_ACTIVE",
+	"TTRx_EXEC_INSTR_FMA",
+	"TTRx_EXEC_INSTR_CVT",
+	"TTRx_EXEC_INSTR_SFU",
+	"TTRx_EXEC_INSTR_MSG",
+	"TTRx_EXEC_INSTR_DIVERGED",
+	"TTRx_EXEC_ICACHE_MISS",
+	"TTRx_EXEC_STARVE_ARITH",
+	"TTRx_CALL_BLEND_SHADER",
+	"TTRx_TEX_MSGI_NUM_QUADS",
+	"TTRx_TEX_DFCH_NUM_PASSES",
+	"TTRx_TEX_DFCH_NUM_PASSES_MISS",
+	"TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+	"TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+	"TTRx_TEX_TFCH_NUM_LINES_FETCHED",
+	"TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+	"TTRx_TEX_TFCH_NUM_OPERATIONS",
+	"TTRx_TEX_FILT_NUM_OPERATIONS",
+	"TTRx_LS_MEM_READ_FULL",
+	"TTRx_LS_MEM_READ_SHORT",
+	"TTRx_LS_MEM_WRITE_FULL",
+	"TTRx_LS_MEM_WRITE_SHORT",
+	"TTRx_LS_MEM_ATOMIC",
+	"TTRx_VARY_INSTR",
+	"TTRx_VARY_SLOT_32",
+	"TTRx_VARY_SLOT_16",
+	"TTRx_ATTR_INSTR",
+	"TTRx_ARITH_INSTR_FP_MUL",
+	"TTRx_BEATS_RD_FTC",
+	"TTRx_BEATS_RD_FTC_EXT",
+	"TTRx_BEATS_RD_LSC",
+	"TTRx_BEATS_RD_LSC_EXT",
+	"TTRx_BEATS_RD_TEX",
+	"TTRx_BEATS_RD_TEX_EXT",
+	"TTRx_BEATS_RD_OTHER",
+	"TTRx_BEATS_WR_LSC_OTHER",
+	"TTRx_BEATS_WR_TIB",
+	"TTRx_BEATS_WR_LSC_WB",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"TTRx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"TTRx_L2_RD_MSG_IN",
+	"TTRx_L2_RD_MSG_IN_STALL",
+	"TTRx_L2_WR_MSG_IN",
+	"TTRx_L2_WR_MSG_IN_STALL",
+	"TTRx_L2_SNP_MSG_IN",
+	"TTRx_L2_SNP_MSG_IN_STALL",
+	"TTRx_L2_RD_MSG_OUT",
+	"TTRx_L2_RD_MSG_OUT_STALL",
+	"TTRx_L2_WR_MSG_OUT",
+	"TTRx_L2_ANY_LOOKUP",
+	"TTRx_L2_READ_LOOKUP",
+	"TTRx_L2_WRITE_LOOKUP",
+	"TTRx_L2_EXT_SNOOP_LOOKUP",
+	"TTRx_L2_EXT_READ",
+	"TTRx_L2_EXT_READ_NOSNP",
+	"TTRx_L2_EXT_READ_UNIQUE",
+	"TTRx_L2_EXT_READ_BEATS",
+	"TTRx_L2_EXT_AR_STALL",
+	"TTRx_L2_EXT_AR_CNT_Q1",
+	"TTRx_L2_EXT_AR_CNT_Q2",
+	"TTRx_L2_EXT_AR_CNT_Q3",
+	"TTRx_L2_EXT_RRESP_0_127",
+	"TTRx_L2_EXT_RRESP_128_191",
+	"TTRx_L2_EXT_RRESP_192_255",
+	"TTRx_L2_EXT_RRESP_256_319",
+	"TTRx_L2_EXT_RRESP_320_383",
+	"TTRx_L2_EXT_WRITE",
+	"TTRx_L2_EXT_WRITE_NOSNP_FULL",
+	"TTRx_L2_EXT_WRITE_NOSNP_PTL",
+	"TTRx_L2_EXT_WRITE_SNP_FULL",
+	"TTRx_L2_EXT_WRITE_SNP_PTL",
+	"TTRx_L2_EXT_WRITE_BEATS",
+	"TTRx_L2_EXT_W_STALL",
+	"TTRx_L2_EXT_AW_CNT_Q1",
+	"TTRx_L2_EXT_AW_CNT_Q2",
+	"TTRx_L2_EXT_AW_CNT_Q3",
+	"TTRx_L2_EXT_SNOOP",
+	"TTRx_L2_EXT_SNOOP_STALL",
+	"TTRx_L2_EXT_SNOOP_RESP_CLEAN",
+	"TTRx_L2_EXT_SNOOP_RESP_DATA",
+	"TTRx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644
index 0000000000000..218e63a61c6ca
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xFu  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFFu << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956u
+#define GPU_ID_PI_T62X                    0x0620u
+#define GPU_ID_PI_T76X                    0x0750u
+#define GPU_ID_PI_T72X                    0x0720u
+#define GPU_ID_PI_TFRX                    0x0880u
+#define GPU_ID_PI_T86X                    0x0860u
+#define GPU_ID_PI_T82X                    0x0820u
+#define GPU_ID_PI_T83X                    0x0830u
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+						(product_id) >= \
+						GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xFu << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xFu << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xFu << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+#define GPU_ID2_VERSION        (GPU_ID2_VERSION_MAJOR | \
+								GPU_ID2_VERSION_MINOR | \
+								GPU_ID2_VERSION_STATUS)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		 (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+		 (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+		 (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+		((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+		 (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+		 (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+	version_major, version_minor, version_status) \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+			version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+		((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+		(((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+		((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+		    GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
+#define GPU_ID2_PRODUCT_TSIX              GPU_ID2_MODEL_MAKE(7, 0)
+#define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
+#define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
+#define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
+#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8, 0)
+#define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
+#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8, 2)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		(((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		(((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		(((u32)status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 0000000000000..514b065d48674
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		seq_printf(sfile, "%-16s  %10u\n",
+				kbdev->devname,
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			seq_printf(sfile, "  %s-0x%p %10u\n",
+				"kctx",
+				element->kctx,
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show, NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_file("gpu_memory", S_IRUGO,
+			kbdev->mali_debugfs_directory, NULL,
+			&kbasep_gpu_memory_debugfs_fops);
+	return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return;
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 0000000000000..28a871a0da4fd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 0000000000000..82e4b4081df40
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,482 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include "mali_kbase_ioctl.h"
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value:  The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size:   The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get(kbdev, &regdump);
+
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.core_features = regdump.core_features;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present =
+		((u64) regdump.shader_present_hi << 32) +
+		regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present =
+		((u64) regdump.tiler_present_hi << 32) +
+		regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present =
+		((u64) regdump.l2_present_hi << 32) +
+		regdump.l2_present_lo;
+	gpu_props->raw_props.stack_present =
+		((u64) regdump.stack_present_hi << 32) +
+		regdump.stack_present_lo;
+
+	for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+	gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
+}
+
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
+{
+	gpu_props->core_props.version_status =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id =
+		KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev:     The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages() << PAGE_SHIFT;
+	gpu_props->core_props.num_exec_engines =
+		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+	/* Field with number of l2 slices is added to MEM_FEATURES register
+	 * since t76x. Below code assumes that for older GPU reserved bits will
+	 * be read as zero. */
+	gpu_props->l2_props.num_l2_slices =
+		KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	if (gpu_props->raw_props.thread_tls_alloc == 0)
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->thread_props.max_threads;
+	else
+		gpu_props->thread_props.tls_alloc =
+				gpu_props->raw_props.thread_tls_alloc;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *gpu_props;
+	struct gpu_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+	base_gpu_props *gpu_props;
+	struct kbase_gpuprops_regdump regdump;
+
+	gpu_props = &kbdev->gpu_props.props;
+
+	/* Dump relevant registers */
+	kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+	/*
+	 * Copy the raw value from the register, later this will get turned
+	 * into the selected coherency mode.
+	 * Additionally, add non-coherent mode, as this is always supported.
+	 */
+	gpu_props->raw_props.coherency_mode = regdump.coherency_features |
+		COHERENCY_FEATURE_BIT(COHERENCY_NONE);
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
+		gpu_props->thread_props.max_thread_group_split = 0;
+}
+
+static struct {
+	u32 type;
+	size_t offset;
+	int size;
+} gpu_property_mapping[] = {
+#define PROP(name, member) \
+	{KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
+		sizeof(((struct base_gpu_props *)0)->member)}
+	PROP(PRODUCT_ID,                  core_props.product_id),
+	PROP(VERSION_STATUS,              core_props.version_status),
+	PROP(MINOR_REVISION,              core_props.minor_revision),
+	PROP(MAJOR_REVISION,              core_props.major_revision),
+	PROP(GPU_FREQ_KHZ_MAX,            core_props.gpu_freq_khz_max),
+	PROP(LOG2_PROGRAM_COUNTER_SIZE,   core_props.log2_program_counter_size),
+	PROP(TEXTURE_FEATURES_0,          core_props.texture_features[0]),
+	PROP(TEXTURE_FEATURES_1,          core_props.texture_features[1]),
+	PROP(TEXTURE_FEATURES_2,          core_props.texture_features[2]),
+	PROP(TEXTURE_FEATURES_3,          core_props.texture_features[3]),
+	PROP(GPU_AVAILABLE_MEMORY_SIZE,   core_props.gpu_available_memory_size),
+	PROP(NUM_EXEC_ENGINES,            core_props.num_exec_engines),
+
+	PROP(L2_LOG2_LINE_SIZE,           l2_props.log2_line_size),
+	PROP(L2_LOG2_CACHE_SIZE,          l2_props.log2_cache_size),
+	PROP(L2_NUM_L2_SLICES,            l2_props.num_l2_slices),
+
+	PROP(TILER_BIN_SIZE_BYTES,        tiler_props.bin_size_bytes),
+	PROP(TILER_MAX_ACTIVE_LEVELS,     tiler_props.max_active_levels),
+
+	PROP(MAX_THREADS,                 thread_props.max_threads),
+	PROP(MAX_WORKGROUP_SIZE,          thread_props.max_workgroup_size),
+	PROP(MAX_BARRIER_SIZE,            thread_props.max_barrier_size),
+	PROP(MAX_REGISTERS,               thread_props.max_registers),
+	PROP(MAX_TASK_QUEUE,              thread_props.max_task_queue),
+	PROP(MAX_THREAD_GROUP_SPLIT,      thread_props.max_thread_group_split),
+	PROP(IMPL_TECH,                   thread_props.impl_tech),
+	PROP(TLS_ALLOC,                   thread_props.tls_alloc),
+
+	PROP(RAW_SHADER_PRESENT,          raw_props.shader_present),
+	PROP(RAW_TILER_PRESENT,           raw_props.tiler_present),
+	PROP(RAW_L2_PRESENT,              raw_props.l2_present),
+	PROP(RAW_STACK_PRESENT,           raw_props.stack_present),
+	PROP(RAW_L2_FEATURES,             raw_props.l2_features),
+	PROP(RAW_CORE_FEATURES,           raw_props.core_features),
+	PROP(RAW_MEM_FEATURES,            raw_props.mem_features),
+	PROP(RAW_MMU_FEATURES,            raw_props.mmu_features),
+	PROP(RAW_AS_PRESENT,              raw_props.as_present),
+	PROP(RAW_JS_PRESENT,              raw_props.js_present),
+	PROP(RAW_JS_FEATURES_0,           raw_props.js_features[0]),
+	PROP(RAW_JS_FEATURES_1,           raw_props.js_features[1]),
+	PROP(RAW_JS_FEATURES_2,           raw_props.js_features[2]),
+	PROP(RAW_JS_FEATURES_3,           raw_props.js_features[3]),
+	PROP(RAW_JS_FEATURES_4,           raw_props.js_features[4]),
+	PROP(RAW_JS_FEATURES_5,           raw_props.js_features[5]),
+	PROP(RAW_JS_FEATURES_6,           raw_props.js_features[6]),
+	PROP(RAW_JS_FEATURES_7,           raw_props.js_features[7]),
+	PROP(RAW_JS_FEATURES_8,           raw_props.js_features[8]),
+	PROP(RAW_JS_FEATURES_9,           raw_props.js_features[9]),
+	PROP(RAW_JS_FEATURES_10,          raw_props.js_features[10]),
+	PROP(RAW_JS_FEATURES_11,          raw_props.js_features[11]),
+	PROP(RAW_JS_FEATURES_12,          raw_props.js_features[12]),
+	PROP(RAW_JS_FEATURES_13,          raw_props.js_features[13]),
+	PROP(RAW_JS_FEATURES_14,          raw_props.js_features[14]),
+	PROP(RAW_JS_FEATURES_15,          raw_props.js_features[15]),
+	PROP(RAW_TILER_FEATURES,          raw_props.tiler_features),
+	PROP(RAW_TEXTURE_FEATURES_0,      raw_props.texture_features[0]),
+	PROP(RAW_TEXTURE_FEATURES_1,      raw_props.texture_features[1]),
+	PROP(RAW_TEXTURE_FEATURES_2,      raw_props.texture_features[2]),
+	PROP(RAW_TEXTURE_FEATURES_3,      raw_props.texture_features[3]),
+	PROP(RAW_GPU_ID,                  raw_props.gpu_id),
+	PROP(RAW_THREAD_MAX_THREADS,      raw_props.thread_max_threads),
+	PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
+			raw_props.thread_max_workgroup_size),
+	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
+	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
+	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+
+	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
+	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
+	PROP(COHERENCY_COHERENCY,         coherency_info.coherency),
+	PROP(COHERENCY_GROUP_0,           coherency_info.group[0].core_mask),
+	PROP(COHERENCY_GROUP_1,           coherency_info.group[1].core_mask),
+	PROP(COHERENCY_GROUP_2,           coherency_info.group[2].core_mask),
+	PROP(COHERENCY_GROUP_3,           coherency_info.group[3].core_mask),
+	PROP(COHERENCY_GROUP_4,           coherency_info.group[4].core_mask),
+	PROP(COHERENCY_GROUP_5,           coherency_info.group[5].core_mask),
+	PROP(COHERENCY_GROUP_6,           coherency_info.group[6].core_mask),
+	PROP(COHERENCY_GROUP_7,           coherency_info.group[7].core_mask),
+	PROP(COHERENCY_GROUP_8,           coherency_info.group[8].core_mask),
+	PROP(COHERENCY_GROUP_9,           coherency_info.group[9].core_mask),
+	PROP(COHERENCY_GROUP_10,          coherency_info.group[10].core_mask),
+	PROP(COHERENCY_GROUP_11,          coherency_info.group[11].core_mask),
+	PROP(COHERENCY_GROUP_12,          coherency_info.group[12].core_mask),
+	PROP(COHERENCY_GROUP_13,          coherency_info.group[13].core_mask),
+	PROP(COHERENCY_GROUP_14,          coherency_info.group[14].core_mask),
+	PROP(COHERENCY_GROUP_15,          coherency_info.group[15].core_mask),
+
+#undef PROP
+};
+
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_gpu_props *kprops = &kbdev->gpu_props;
+	struct base_gpu_props *props = &kprops->props;
+	u32 count = ARRAY_SIZE(gpu_property_mapping);
+	u32 i;
+	u32 size = 0;
+	u8 *p;
+
+	for (i = 0; i < count; i++) {
+		/* 4 bytes for the ID, and the size of the property */
+		size += 4 + gpu_property_mapping[i].size;
+	}
+
+	kprops->prop_buffer_size = size;
+	kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!kprops->prop_buffer) {
+		kprops->prop_buffer_size = 0;
+		return -ENOMEM;
+	}
+
+	p = kprops->prop_buffer;
+
+#define WRITE_U8(v) (*p++ = (v) & 0xFF)
+#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
+#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
+#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
+
+	for (i = 0; i < count; i++) {
+		u32 type = gpu_property_mapping[i].type;
+		u8 type_size;
+		void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
+
+		switch (gpu_property_mapping[i].size) {
+		case 1:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
+			break;
+		case 2:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
+			break;
+		case 4:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
+			break;
+		case 8:
+			type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Invalid gpu_property_mapping type=%d size=%d",
+				type, gpu_property_mapping[i].size);
+			return -EINVAL;
+		}
+
+		WRITE_U32((type<<2) | type_size);
+
+		switch (type_size) {
+		case KBASE_GPUPROP_VALUE_SIZE_U8:
+			WRITE_U8(*((u8 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U16:
+			WRITE_U16(*((u16 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U32:
+			WRITE_U32(*((u32 *)field));
+			break;
+		case KBASE_GPUPROP_VALUE_SIZE_U64:
+			WRITE_U64(*((u64 *)field));
+			break;
+		default: /* Cannot be reached */
+			WARN_ON(1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 0000000000000..37d9c08770bf8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
+ * @kbdev: The kbase device
+ *
+ * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user
+ * space to read.
+ */
+int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value
+ * @gpu_props: the &base_gpu_props structure
+ *
+ * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into
+ * separate fields (version_status, minor_revision, major_revision, product_id)
+ * stored in base_gpu_props::core_props.
+ */
+void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props);
+
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 0000000000000..d7877d1d4a576
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,98 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 core_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 thread_tls_alloc;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[GPU_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 stack_present_lo;
+	u32 stack_present_hi;
+	u32 coherency_features;
+};
+
+struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+	u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+
+	u32 prop_buffer_size;
+	void *prop_buffer;
+};
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
new file mode 100644
index 0000000000000..0481f80fec752
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -0,0 +1,268 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_gwt.h"
+#include <linux/list_sort.h>
+
+static inline void kbase_gpu_gwt_setup_page_permission(
+				struct kbase_context *kctx,
+				unsigned long flag,
+				struct rb_node *node)
+{
+	struct rb_node *rbnode = node;
+
+	while (rbnode) {
+		struct kbase_va_region *reg;
+		int err = 0;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->nr_pages && !(reg->flags & KBASE_REG_FREE) &&
+					(reg->flags & KBASE_REG_GPU_WR)) {
+			err = kbase_mmu_update_pages(kctx, reg->start_pfn,
+					kbase_get_gpu_phy_pages(reg),
+					reg->gpu_alloc->nents,
+					reg->flags & flag);
+			if (err)
+				dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n");
+		}
+
+		rbnode = rb_next(rbnode);
+	}
+}
+
+static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx,
+					unsigned long flag)
+{
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_same)));
+	kbase_gpu_gwt_setup_page_permission(kctx, flag,
+				rb_first(&(kctx->reg_rbtree_custom)));
+}
+
+
+int kbase_gpu_gwt_start(struct kbase_context *kctx)
+{
+	kbase_gpu_vm_lock(kctx);
+	if (kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EBUSY;
+	}
+
+	INIT_LIST_HEAD(&kctx->gwt_current_list);
+	INIT_LIST_HEAD(&kctx->gwt_snapshot_list);
+
+	/* If GWT is enabled using new vector dumping format
+	 * from user space, back up status of the job serialization flag and
+	 * use full serialisation of jobs for dumping.
+	 * Status will be restored on end of dumping in gwt_stop.
+	 */
+	kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs;
+	kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT |
+						KBASE_SERIALIZE_INTER_SLOT;
+
+	/* Mark gwt enabled before making pages read only in case a
+	   write page fault is triggered while we're still in this loop.
+	   (kbase_gpu_vm_lock() doesn't prevent this!)
+	*/
+	kctx->gwt_enabled = true;
+	kctx->gwt_was_enabled = true;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+int kbase_gpu_gwt_stop(struct kbase_context *kctx)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+
+	kbase_gpu_vm_lock(kctx);
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) {
+		list_del(&pos->link);
+		kfree(pos);
+	}
+
+	kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs;
+
+	kbase_gpu_gwt_setup_pages(kctx, ~0UL);
+
+	kctx->gwt_enabled = false;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
+
+
+static int list_cmp_function(void *priv, struct list_head *a,
+				struct list_head *b)
+{
+	struct kbasep_gwt_list_element *elementA = container_of(a,
+				struct kbasep_gwt_list_element, link);
+	struct kbasep_gwt_list_element *elementB = container_of(b,
+				struct kbasep_gwt_list_element, link);
+
+	CSTD_UNUSED(priv);
+
+	if (elementA->page_addr > elementB->page_addr)
+		return 1;
+	return -1;
+}
+
+static void kbase_gpu_gwt_collate(struct kbase_context *kctx,
+		struct list_head *snapshot_list)
+{
+	struct kbasep_gwt_list_element *pos, *n;
+	struct kbasep_gwt_list_element *collated = NULL;
+
+	/* Sort the list */
+	list_sort(NULL, snapshot_list, list_cmp_function);
+
+	/* Combine contiguous areas. */
+	list_for_each_entry_safe(pos, n, snapshot_list, link) {
+		if (collated == NULL ||	collated->region !=
+					pos->region ||
+					(collated->page_addr +
+					(collated->num_pages * PAGE_SIZE)) !=
+					pos->page_addr) {
+			/* This is the first time through, a new region or
+			 * is not contiguous - start collating to this element
+			 */
+			collated = pos;
+		} else {
+			/* contiguous so merge */
+			collated->num_pages += pos->num_pages;
+			/* remove element from list */
+			list_del(&pos->link);
+			kfree(pos);
+		}
+	}
+}
+
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump)
+{
+	const u32 ubuf_size = gwt_dump->in.len;
+	u32 ubuf_count = 0;
+	__user void *user_addr = (__user void *)
+			(uintptr_t)gwt_dump->in.addr_buffer;
+	__user void *user_sizes = (__user void *)
+			(uintptr_t)gwt_dump->in.size_buffer;
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (!kctx->gwt_enabled) {
+		kbase_gpu_vm_unlock(kctx);
+		/* gwt_dump shouldn't be called when gwt is disabled */
+		return -EPERM;
+	}
+
+	if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer
+			|| !gwt_dump->in.size_buffer) {
+		kbase_gpu_vm_unlock(kctx);
+		/* We don't have any valid user space buffer to copy the
+		 * write modified addresses.
+		 */
+		return -EINVAL;
+	}
+
+	if (list_empty(&kctx->gwt_snapshot_list) &&
+			!list_empty(&kctx->gwt_current_list)) {
+
+		list_replace_init(&kctx->gwt_current_list,
+					&kctx->gwt_snapshot_list);
+
+		/* We have collected all write faults so far
+		 * and they will be passed on to user space.
+		 * Reset the page flags state to allow collection of
+		 * further write faults.
+		 */
+		kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR);
+
+		/* Sort and combine consecutive pages in the dump list*/
+		kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list);
+	}
+
+	while ((!list_empty(&kctx->gwt_snapshot_list))) {
+		u64 addr_buffer[32];
+		u64 num_page_buffer[32];
+		u32 count = 0;
+		int err;
+		struct kbasep_gwt_list_element *dump_info, *n;
+
+		list_for_each_entry_safe(dump_info, n,
+				&kctx->gwt_snapshot_list, link) {
+			addr_buffer[count] = dump_info->page_addr;
+			num_page_buffer[count] = dump_info->num_pages;
+			count++;
+			list_del(&dump_info->link);
+			kfree(dump_info);
+			if (ARRAY_SIZE(addr_buffer) == count ||
+					ubuf_size == (ubuf_count + count))
+				break;
+		}
+
+		if (count) {
+			err = copy_to_user((user_addr +
+					(ubuf_count * sizeof(u64))),
+					(void *)addr_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+			err = copy_to_user((user_sizes +
+					(ubuf_count * sizeof(u64))),
+					(void *)num_page_buffer,
+					count * sizeof(u64));
+			if (err) {
+				dev_err(kctx->kbdev->dev, "Copy to user failure\n");
+				kbase_gpu_vm_unlock(kctx);
+				return err;
+			}
+
+			ubuf_count += count;
+		}
+
+		if (ubuf_count == ubuf_size)
+			break;
+	}
+
+	if (!list_empty(&kctx->gwt_snapshot_list))
+		gwt_dump->out.more_data_available = 1;
+	else
+		gwt_dump->out.more_data_available = 0;
+
+	gwt_dump->out.no_of_addr_collected = ubuf_count;
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/drivers/gpu/arm/midgard/mali_kbase_gwt.h
new file mode 100644
index 0000000000000..7e7746e649154
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_GWT_H)
+#define _KBASE_GWT_H
+
+#include <mali_kbase.h>
+#include <mali_kbase_ioctl.h>
+
+/**
+ * kbase_gpu_gwt_start - Start the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_start(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_stop - Stop the GPU write tracking
+ * @kctx: Pointer to kernel context
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_stop(struct kbase_context *kctx);
+
+/**
+ * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space.
+ * @kctx:	Pointer to kernel context
+ * @gwt_dump:	User space data to be passed.
+ *
+ * @return 0 on success, error on failure.
+ */
+int kbase_gpu_gwt_dump(struct kbase_context *kctx,
+			union kbase_ioctl_cinstr_gwt_dump *gwt_dump);
+
+#endif /* _KBASE_GWT_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 0000000000000..f34f53a919b8d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,498 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+	u32 product_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			features = base_hw_features_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			features = base_hw_features_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			features = base_hw_features_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			features = base_hw_features_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			features = base_hw_features_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TKAX:
+			features = base_hw_features_tKAx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			features = base_hw_features_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TBOX:
+			features = base_hw_features_tBOx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
+		switch (product_id) {
+		case GPU_ID_PI_TFRX:
+			/* FALLTHROUGH */
+		case GPU_ID_PI_T86X:
+			features = base_hw_features_tFxx;
+			break;
+		case GPU_ID_PI_T83X:
+			features = base_hw_features_t83x;
+			break;
+		case GPU_ID_PI_T82X:
+			features = base_hw_features_t82x;
+			break;
+		case GPU_ID_PI_T76X:
+			features = base_hw_features_t76x;
+			break;
+		case GPU_ID_PI_T72X:
+			features = base_hw_features_t72x;
+			break;
+		case GPU_ID_PI_T62X:
+			features = base_hw_features_t62x;
+			break;
+		case GPU_ID_PI_T60X:
+			features = base_hw_features_t60x;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+/**
+ * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: pointer to an array of hardware issues, terminated by
+ * BASE_HW_ISSUE_END.
+ *
+ * This function can only be used on new-format GPU IDs, i.e. those for which
+ * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU will
+ * be treated as the most recent known version not later than the actual
+ * version. In such circumstances, the GPU ID in @kbdev will also be replaced
+ * with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by kbase_gpuprops_get_props()
+ * before calling this function.
+ */
+static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
+					struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues = NULL;
+
+	struct base_hw_product {
+		u32 product_model;
+		struct {
+			u32 version;
+			const enum base_hw_issue *issues;
+		} map[7];
+	};
+
+	static const struct base_hw_product base_hw_products[] = {
+		{GPU_ID2_PRODUCT_TMIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 1),
+		   base_hw_issues_tMIx_r0p0_05dev0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1},
+		  {U32_MAX /* sentinel value */, NULL} } },
+
+		{GPU_ID2_PRODUCT_THEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2},
+		  {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TSIX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0},
+		  {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDVX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TNOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TGOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TKAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TTRX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBOX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0},
+		  {U32_MAX, NULL} } },
+	};
+
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+	const struct base_hw_product *product = NULL;
+	size_t p;
+
+	/* Stop when we reach the end of the products array. */
+	for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) {
+		if (product_model == base_hw_products[p].product_model) {
+			product = &base_hw_products[p];
+			break;
+		}
+	}
+
+	if (product != NULL) {
+		/* Found a matching product. */
+		const u32 version = gpu_id & GPU_ID2_VERSION;
+#if !MALI_CUSTOMER_RELEASE
+		u32 fallback_version = 0;
+		const enum base_hw_issue *fallback_issues = NULL;
+#endif
+		size_t v;
+
+		/* Stop when we reach the end of the map. */
+		for (v = 0; product->map[v].version != U32_MAX; ++v) {
+
+			if (version == product->map[v].version) {
+				/* Exact match so stop. */
+				issues = product->map[v].issues;
+				break;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* Check whether this is a candidate for most recent
+				known version not later than the actual
+				version. */
+			if ((version > product->map[v].version) &&
+				(product->map[v].version >= fallback_version)) {
+				fallback_version = product->map[v].version;
+				fallback_issues = product->map[v].issues;
+			}
+#endif
+		}
+
+#if !MALI_CUSTOMER_RELEASE
+		if ((issues == NULL) && (fallback_issues != NULL)) {
+			/* Fall back to the issue set of the most recent known
+				version not later than the actual version. */
+			issues = fallback_issues;
+
+			dev_info(kbdev->dev,
+				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
+				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(gpu_id & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					GPU_ID2_VERSION_MAJOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_MINOR) >>
+					GPU_ID2_VERSION_MINOR_SHIFT,
+				(fallback_version & GPU_ID2_VERSION_STATUS) >>
+					GPU_ID2_VERSION_STATUS_SHIFT);
+
+			gpu_id &= ~GPU_ID2_VERSION;
+			gpu_id |= fallback_version;
+			kbdev->gpu_props.props.raw_props.gpu_id = gpu_id;
+
+			kbase_gpuprops_update_core_props_gpu_id(
+				&kbdev->gpu_props.props);
+		}
+#endif
+	}
+	return issues;
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 product_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			issues = kbase_hw_get_issues_for_new_id(kbdev);
+			if (issues == NULL) {
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+
+#if !MALI_CUSTOMER_RELEASE
+			/* The GPU ID might have been replaced with the last
+			   known version of the same GPU. */
+			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+#endif
+
+		} else {
+			switch (gpu_id) {
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+				issues = base_hw_issues_t60x_r0p0_15dev0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+				issues = base_hw_issues_t60x_r0p0_eac;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+				issues = base_hw_issues_t60x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+				issues = base_hw_issues_t62x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+				issues = base_hw_issues_t62x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+				issues = base_hw_issues_t62x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+				issues = base_hw_issues_t76x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+				issues = base_hw_issues_t76x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+				issues = base_hw_issues_t76x_r0p1_50rel0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+				issues = base_hw_issues_t76x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+				issues = base_hw_issues_t76x_r0p3;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+				issues = base_hw_issues_t76x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+				issues = base_hw_issues_t72x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+				issues = base_hw_issues_t72x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+				issues = base_hw_issues_t72x_r1p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+				issues = base_hw_issues_tFRx_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+				issues = base_hw_issues_tFRx_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+				issues = base_hw_issues_tFRx_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+				issues = base_hw_issues_tFRx_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+				issues = base_hw_issues_t86x_r0p2;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+				issues = base_hw_issues_t86x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+				issues = base_hw_issues_t86x_r2p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+				issues = base_hw_issues_t83x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+				issues = base_hw_issues_t83x_r1p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+				issues = base_hw_issues_t82x_r0p0;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+				issues = base_hw_issues_t82x_r0p1;
+				break;
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+				issues = base_hw_issues_t82x_r1p0;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		}
+	} else {
+		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			case GPU_ID2_PRODUCT_THEX:
+				issues = base_hw_issues_model_tHEx;
+				break;
+			case GPU_ID2_PRODUCT_TSIX:
+				issues = base_hw_issues_model_tSIx;
+				break;
+			case GPU_ID2_PRODUCT_TDVX:
+				issues = base_hw_issues_model_tDVx;
+				break;
+			case GPU_ID2_PRODUCT_TNOX:
+				issues = base_hw_issues_model_tNOx;
+				break;
+			case GPU_ID2_PRODUCT_TGOX:
+				issues = base_hw_issues_model_tGOx;
+				break;
+			case GPU_ID2_PRODUCT_TKAX:
+				issues = base_hw_issues_model_tKAx;
+				break;
+			case GPU_ID2_PRODUCT_TTRX:
+				issues = base_hw_issues_model_tTRx;
+				break;
+			case GPU_ID2_PRODUCT_TBOX:
+				issues = base_hw_issues_model_tBOx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
+			switch (product_id) {
+			case GPU_ID_PI_T60X:
+				issues = base_hw_issues_model_t60x;
+				break;
+			case GPU_ID_PI_T62X:
+				issues = base_hw_issues_model_t62x;
+				break;
+			case GPU_ID_PI_T72X:
+				issues = base_hw_issues_model_t72x;
+				break;
+			case GPU_ID_PI_T76X:
+				issues = base_hw_issues_model_t76x;
+				break;
+			case GPU_ID_PI_TFRX:
+				issues = base_hw_issues_model_tFRx;
+				break;
+			case GPU_ID_PI_T86X:
+				issues = base_hw_issues_model_t86x;
+				break;
+			case GPU_ID_PI_T83X:
+				issues = base_hw_issues_model_t83x;
+				break;
+			case GPU_ID_PI_T82X:
+				issues = base_hw_issues_model_t82x;
+				break;
+			default:
+				dev_err(kbdev->dev, "Unknown GPU ID %x",
+					gpu_id);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+				GPU_ID2_PRODUCT_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+				GPU_ID2_ARCH_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_MINOR) >>
+				GPU_ID2_ARCH_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_ARCH_REV) >>
+				GPU_ID2_ARCH_REV_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+				GPU_ID2_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_MINOR) >>
+				GPU_ID2_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID2_VERSION_STATUS) >>
+				GPU_ID2_VERSION_STATUS_SHIFT);
+	} else {
+		dev_info(kbdev->dev,
+			"GPU identified as 0x%04x r%dp%d status %d",
+			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MAJOR) >>
+				GPU_ID_VERSION_MAJOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_MINOR) >>
+				GPU_ID_VERSION_MINOR_SHIFT,
+			(gpu_id & GPU_ID_VERSION_STATUS) >>
+				GPU_ID_VERSION_STATUS_SHIFT);
+	}
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 0000000000000..f386b1624317e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if the GPU ID was recognized, otherwise -EINVAL.
+ *
+ * The GPU ID is read from the @kbdev.
+ *
+ * In debugging versions of the driver, unknown versions of a known GPU with a
+ * new-format ID will be treated as the most recent known version not later
+ * than the actual version. In such circumstances, the GPU ID in @kbdev will
+ * also be replaced with the most recent known version.
+ *
+ * Note: The GPU configuration must have been read by
+ * kbase_gpuprops_get_props() before calling this function.
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 0000000000000..dde4965c426a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev:	Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 0000000000000..124a2d9cf0c31
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/**
+ * struct kbase_hwaccess_data - object encapsulating the GPU backend specific
+ *                              data for the HW access layer.
+ *                              hwaccess_lock (a spinlock) must be held when
+ *                              accessing this structure.
+ * @active_kctx:     pointer to active kbase context which last submitted an
+ *                   atom to GPU and while the context is active it can
+ *                   submit new atoms to GPU from the irq context also, without
+ *                   going through the bottom half of job completion path.
+ * @backend:         GPU backend specific data for HW access layer
+ */
+struct kbase_hwaccess_data {
+	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
+
+	struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 0000000000000..63844d97ce021
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ *				  GPU
+ * @kbdev:	Device pointer
+ * @regdump:	Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * The caller should ensure that GPU remains powered-on during this function.
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask. It will power-on the GPU if required.
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 0000000000000..0c5ceffb0e47a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev:	Kbase device
+ * @kctx:	Kbase context
+ * @enable:	HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				struct kbase_ioctl_hwcnt_enable *enable);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx:	Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ *				       completed.
+ * @kctx:	Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ *				     completed
+ * @kctx:	Kbase context
+ * @success:	Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+						bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx:	Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev:	Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 0000000000000..580ac98789592
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,378 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev:	Device pointer
+ * @atom:	Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_backend_slot_update(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_find_and_release_free_address_space() - Release a free AS
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ *
+ * This function can evict an idle context from the runpool, freeing up the
+ * address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_ctx_sched_release()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ *	   available
+ */
+int kbase_backend_find_and_release_free_address_space(
+		struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ *			     provided address space.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer. May be NULL
+ * @as_nr:	Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned.
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev:	Device pointer
+ * @kctx:	Context pointer
+ * @js:         Job slot to activate context on
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ *	   not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+					struct kbase_context *kctx, int js);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ *                                 de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+				struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ *                                   de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx:  Context pointer
+ *
+ * Caller must hold kbase_device->mmu_hw_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ *                            one
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the failed atom
+ *
+ * On some GPUs, the GPU cache must be cleaned following a failed atom. This
+ * function performs a clean if it is required by @katom.
+ */
+void kbase_backend_cacheclean(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom);
+
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ *				 completing an atom.
+ * @kbdev:	Device pointer
+ * @katom:	Pointer to the atom to complete
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_complete_wq_post_sched - Perform backend-specific actions
+ *                                        required on completing an atom, after
+ *                                        any scheduling has taken place.
+ * @kbdev:         Device pointer
+ * @core_req:      Core requirements of atom
+ * @coreref_state: Coreref state of atom
+ *
+ * This function should only be called from kbase_jd_done_worker() or
+ * js_return_worker().
+ */
+void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
+		base_jd_core_req core_req,
+		enum kbase_atom_coreref_state coreref_state);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ *			   and remove any others from the ringbuffers.
+ * @kbdev:		Device pointer
+ * @end_timestamp:	Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ *                              @js
+ * @kbdev: Device pointer
+ * @js:    Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+					int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ *				      slot.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ *					that are currently on the GPU.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ *				       has changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ *			       submitted to slot @js.
+ * @kbdev:	Device pointer
+ * @js:		Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ *                                        running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ *                               to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *                not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom);
+
+/* Object containing callbacks for enabling/disabling protected mode, used
+ * on GPU which supports protected mode switching natively.
+ */
+extern struct protected_mode_ops kbase_native_protected_ops;
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 0000000000000..4598d8099765b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ *         initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+		unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+		u64 new_core_mask_js0, u64 new_core_mask_js1,
+		u64 new_core_mask_js2);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+				const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ *              pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev  The kbase device structure for the device (must be a valid
+ *               pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ *               @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+					const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ *                      be NULL. The contents of this array must not be
+ *                      modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 0000000000000..9b86b51070215
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() -  Wait for GPU write flush
+ * @kbdev:	Kbase device
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_device *kbdev);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
new file mode 100644
index 0000000000000..10706b8d25486
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_READER_H_
+#define _KBASE_HWCNT_READER_H_
+
+/* The ids of ioctl commands. */
+#define KBASE_HWCNT_READER 0xBE
+#define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
+#define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
+#define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+		struct kbase_hwcnt_reader_metadata)
+#define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
+#define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
+#define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+
+/**
+ * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
+ * @timestamp:  time when sample was collected
+ * @event_id:   id of an event that triggered sample collection
+ * @buffer_idx: position in sampling area where sample buffer was stored
+ */
+struct kbase_hwcnt_reader_metadata {
+	u64 timestamp;
+	u32 event_id;
+	u32 buffer_idx;
+};
+
+/**
+ * enum base_hwcnt_reader_event - hwcnt dumping events
+ * @BASE_HWCNT_READER_EVENT_MANUAL:   manual request for dump
+ * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump
+ * @BASE_HWCNT_READER_EVENT_PREJOB:   prejob dump request
+ * @BASE_HWCNT_READER_EVENT_POSTJOB:  postjob dump request
+ * @BASE_HWCNT_READER_EVENT_COUNT:    number of supported events
+ */
+enum base_hwcnt_reader_event {
+	BASE_HWCNT_READER_EVENT_MANUAL,
+	BASE_HWCNT_READER_EVENT_PERIODIC,
+	BASE_HWCNT_READER_EVENT_PREJOB,
+	BASE_HWCNT_READER_EVENT_POSTJOB,
+
+	BASE_HWCNT_READER_EVENT_COUNT
+};
+
+#endif /* _KBASE_HWCNT_READER_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
new file mode 100644
index 0000000000000..bee2f3a172eae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -0,0 +1,868 @@
+/*
+ *
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IOCTL_H_
+#define _KBASE_IOCTL_H_
+
+#ifdef __cpluscplus
+extern "C" {
+#endif
+
+#include <asm-generic/ioctl.h>
+#include <linux/types.h>
+
+#define KBASE_IOCTL_TYPE 0x80
+
+/*
+ * 11.1:
+ * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
+ * 11.2:
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_SECURE,
+ *   which some user-side clients prior to 11.2 might fault if they received
+ *   them
+ * 11.3:
+ * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
+ *   KBASE_IOCTL_STICKY_RESOURCE_UNMAP
+ * 11.4:
+ * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
+ * 11.5:
+ * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
+ * 11.6:
+ * - Added flags field to base_jit_alloc_info structure, which can be used to
+ *   specify pseudo chunked tiler alignment for JIT allocations.
+ * 11.7:
+ * - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ *   under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ *   JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ *   with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ */
+#define BASE_UK_VERSION_MAJOR 11
+#define BASE_UK_VERSION_MINOR 11
+
+/**
+ * struct kbase_ioctl_version_check - Check version compatibility with kernel
+ *
+ * @major: Major version number
+ * @minor: Minor version number
+ */
+struct kbase_ioctl_version_check {
+	__u16 major;
+	__u16 minor;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
+
+/**
+ * struct kbase_ioctl_set_flags - Set kernel context creation flags
+ *
+ * @create_flags: Flags - see base_context_create_flags
+ */
+struct kbase_ioctl_set_flags {
+	__u32 create_flags;
+};
+
+#define KBASE_IOCTL_SET_FLAGS \
+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+
+/**
+ * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
+ *
+ * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @nr_atoms: Number of entries in the array
+ * @stride: sizeof(struct base_jd_atom_v2)
+ */
+struct kbase_ioctl_job_submit {
+	__u64 addr;
+	__u32 nr_atoms;
+	__u32 stride;
+};
+
+#define KBASE_IOCTL_JOB_SUBMIT \
+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
+
+/**
+ * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
+ *
+ * @buffer: Pointer to the buffer to store properties into
+ * @size: Size of the buffer
+ * @flags: Flags - must be zero for now
+ *
+ * The ioctl will return the number of bytes stored into @buffer or an error
+ * on failure (e.g. @size is too small). If @size is specified as 0 then no
+ * data will be written but the return value will be the number of bytes needed
+ * for all the properties.
+ *
+ * @flags may be used in the future to request a different format for the
+ * buffer. With @flags == 0 the following format is used.
+ *
+ * The buffer will be filled with pairs of values, a u32 key identifying the
+ * property followed by the value. The size of the value is identified using
+ * the bottom bits of the key. The value then immediately followed the key and
+ * is tightly packed (there is no padding). All keys and values are
+ * little-endian.
+ *
+ * 00 = u8
+ * 01 = u16
+ * 10 = u32
+ * 11 = u64
+ */
+struct kbase_ioctl_get_gpuprops {
+	__u64 buffer;
+	__u32 size;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_GET_GPUPROPS \
+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+
+#define KBASE_IOCTL_POST_TERM \
+	_IO(KBASE_IOCTL_TYPE, 4)
+
+/**
+ * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
+ *
+ * @va_pages: The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate
+ * @extent: The number of extra pages to allocate on each GPU fault which grows
+ *          the region
+ * @flags: Flags
+ * @gpu_va: The GPU virtual address which is allocated
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alloc {
+	struct {
+		__u64 va_pages;
+		__u64 commit_pages;
+		__u64 extent;
+		__u64 flags;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALLOC \
+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+
+/**
+ * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
+ * @gpu_addr: A GPU address contained within the region
+ * @query: The type of query
+ * @value: The result of the query
+ *
+ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_query {
+	struct {
+		__u64 gpu_addr;
+		__u64 query;
+	} in;
+	struct {
+		__u64 value;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_QUERY \
+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+
+#define KBASE_MEM_QUERY_COMMIT_SIZE	((u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE		((u64)2)
+#define KBASE_MEM_QUERY_FLAGS		((u64)3)
+
+/**
+ * struct kbase_ioctl_mem_free - Free a memory region
+ * @gpu_addr: Handle to the region to free
+ */
+struct kbase_ioctl_mem_free {
+	__u64 gpu_addr;
+};
+
+#define KBASE_IOCTL_MEM_FREE \
+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+
+/**
+ * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
+ * @buffer_count: requested number of dumping buffers
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ *
+ * A fd is returned from the ioctl if successful, or a negative value on error
+ */
+struct kbase_ioctl_hwcnt_reader_setup {
+	__u32 buffer_count;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_READER_SETUP \
+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
+
+/**
+ * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
+ * @dump_buffer:  GPU address to write counters to
+ * @jm_bm:        counters selection bitmask (JM)
+ * @shader_bm:    counters selection bitmask (Shader)
+ * @tiler_bm:     counters selection bitmask (Tiler)
+ * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
+ */
+struct kbase_ioctl_hwcnt_enable {
+	__u64 dump_buffer;
+	__u32 jm_bm;
+	__u32 shader_bm;
+	__u32 tiler_bm;
+	__u32 mmu_l2_bm;
+};
+
+#define KBASE_IOCTL_HWCNT_ENABLE \
+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
+
+#define KBASE_IOCTL_HWCNT_DUMP \
+	_IO(KBASE_IOCTL_TYPE, 10)
+
+#define KBASE_IOCTL_HWCNT_CLEAR \
+	_IO(KBASE_IOCTL_TYPE, 11)
+
+/**
+ * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
+ * @data:    Counter samples for the dummy model.
+ * @size:    Size of the counter sample data.
+ * @padding: Padding.
+ */
+struct kbase_ioctl_hwcnt_values {
+	__u64 data;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_HWCNT_SET \
+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+
+/**
+ * struct kbase_ioctl_disjoint_query - Query the disjoint counter
+ * @counter:   A counter of disjoint events in the kernel
+ */
+struct kbase_ioctl_disjoint_query {
+	__u32 counter;
+};
+
+#define KBASE_IOCTL_DISJOINT_QUERY \
+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+
+/**
+ * struct kbase_ioctl_get_ddk_version - Query the kernel version
+ * @version_buffer: Buffer to receive the kernel version string
+ * @size: Size of the buffer
+ * @padding: Padding
+ *
+ * The ioctl will return the number of bytes written into version_buffer
+ * (which includes a NULL byte) or a negative error code
+ *
+ * The ioctl request code has to be _IOW because the data in ioctl struct is
+ * being copied to the kernel, even though the kernel then writes out the
+ * version info to the buffer specified in the ioctl.
+ */
+struct kbase_ioctl_get_ddk_version {
+	__u64 version_buffer;
+	__u32 size;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_DDK_VERSION \
+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+
+/**
+ * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ *
+ * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
+ * backwards compatibility.
+ */
+struct kbase_ioctl_mem_jit_init_old {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT_OLD \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old)
+
+/**
+ * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator
+ *
+ * @va_pages: Number of VA pages to reserve for JIT
+ * @max_allocations: Maximum number of concurrent allocations
+ * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @padding: Currently unused, must be zero
+ *
+ * Note that depending on the VA size of the application and GPU, the value
+ * specified in @va_pages may be ignored.
+ */
+struct kbase_ioctl_mem_jit_init {
+	__u64 va_pages;
+	__u8 max_allocations;
+	__u8 trim_level;
+	__u8 padding[6];
+};
+
+#define KBASE_IOCTL_MEM_JIT_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+
+/**
+ * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
+ *
+ * @handle: GPU memory handle (GPU VA)
+ * @user_addr: The address where it is mapped in user space
+ * @size: The number of bytes to synchronise
+ * @type: The direction to synchronise: 0 is sync to memory (clean),
+ * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_mem_sync {
+	__u64 handle;
+	__u64 user_addr;
+	__u64 size;
+	__u8 type;
+	__u8 padding[7];
+};
+
+#define KBASE_IOCTL_MEM_SYNC \
+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+
+/**
+ * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
+ *
+ * @gpu_addr: The GPU address of the memory region
+ * @cpu_addr: The CPU address to locate
+ * @size: A size in bytes to validate is contained within the region
+ * @offset: The offset from the start of the memory region to @cpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_cpu_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 cpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
+
+/**
+ * struct kbase_ioctl_get_context_id - Get the kernel context ID
+ *
+ * @id: The kernel context ID
+ */
+struct kbase_ioctl_get_context_id {
+	__u32 id;
+};
+
+#define KBASE_IOCTL_GET_CONTEXT_ID \
+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+
+/**
+ * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
+ *
+ * @flags: Flags
+ *
+ * The ioctl returns a file descriptor when successful
+ */
+struct kbase_ioctl_tlstream_acquire {
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+
+#define KBASE_IOCTL_TLSTREAM_FLUSH \
+	_IO(KBASE_IOCTL_TYPE, 19)
+
+/**
+ * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
+ *
+ * @gpu_addr: The memory region to modify
+ * @pages:    The number of physical pages that should be present
+ *
+ * The ioctl may return on the following error codes or 0 for success:
+ *   -ENOMEM: Out of memory
+ *   -EINVAL: Invalid arguments
+ */
+struct kbase_ioctl_mem_commit {
+	__u64 gpu_addr;
+	__u64 pages;
+};
+
+#define KBASE_IOCTL_MEM_COMMIT \
+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+
+/**
+ * union kbase_ioctl_mem_alias - Create an alias of memory regions
+ * @flags: Flags, see BASE_MEM_xxx
+ * @stride: Bytes between start of each memory region
+ * @nents: The number of regions to pack together into the alias
+ * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_alias {
+	struct {
+		__u64 flags;
+		__u64 stride;
+		__u64 nents;
+		__u64 aliasing_info;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_ALIAS \
+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+
+/**
+ * union kbase_ioctl_mem_import - Import memory for use by the GPU
+ * @flags: Flags, see BASE_MEM_xxx
+ * @phandle: Handle to the external memory
+ * @type: Type of external memory, see base_mem_import_type
+ * @padding: Amount of extra VA pages to append to the imported buffer
+ * @gpu_va: Address of the new alias
+ * @va_pages: Size of the new alias
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_import {
+	struct {
+		__u64 flags;
+		__u64 phandle;
+		__u32 type;
+		__u32 padding;
+	} in;
+	struct {
+		__u64 flags;
+		__u64 gpu_va;
+		__u64 va_pages;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_IMPORT \
+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+
+/**
+ * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
+ * @gpu_va: The GPU region to modify
+ * @flags: The new flags to set
+ * @mask: Mask of the flags to modify
+ */
+struct kbase_ioctl_mem_flags_change {
+	__u64 gpu_va;
+	__u64 flags;
+	__u64 mask;
+};
+
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+
+/**
+ * struct kbase_ioctl_stream_create - Create a synchronisation stream
+ * @name: A name to identify this stream. Must be NULL-terminated.
+ *
+ * Note that this is also called a "timeline", but is named stream to avoid
+ * confusion with other uses of the word.
+ *
+ * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
+ *
+ * The ioctl returns a file descriptor.
+ */
+struct kbase_ioctl_stream_create {
+	char name[32];
+};
+
+#define KBASE_IOCTL_STREAM_CREATE \
+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+
+/**
+ * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
+ * @fd: The file descriptor to validate
+ */
+struct kbase_ioctl_fence_validate {
+	int fd;
+};
+
+#define KBASE_IOCTL_FENCE_VALIDATE \
+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+
+/**
+ * struct kbase_ioctl_get_profiling_controls - Get the profiling controls
+ * @count: The size of @buffer in u32 words
+ * @buffer: The buffer to receive the profiling controls
+ * @padding: Padding
+ */
+struct kbase_ioctl_get_profiling_controls {
+	__u64 buffer;
+	__u32 count;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_GET_PROFILING_CONTROLS \
+	_IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls)
+
+/**
+ * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
+ * @buffer: Pointer to the information
+ * @len: Length
+ * @padding: Padding
+ *
+ * The data provided is accessible through a debugfs file
+ */
+struct kbase_ioctl_mem_profile_add {
+	__u64 buffer;
+	__u32 len;
+	__u32 padding;
+};
+
+#define KBASE_IOCTL_MEM_PROFILE_ADD \
+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+
+/**
+ * struct kbase_ioctl_soft_event_update - Update the status of a soft-event
+ * @event: GPU address of the event which has been updated
+ * @new_status: The new status to set
+ * @flags: Flags for future expansion
+ */
+struct kbase_ioctl_soft_event_update {
+	__u64 event;
+	__u32 new_status;
+	__u32 flags;
+};
+
+#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to map
+ */
+struct kbase_ioctl_sticky_resource_map {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
+
+/**
+ * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
+ *                                          previously permanently mapped
+ * @count: Number of resources
+ * @address: Array of u64 GPU addresses of the external resources to unmap
+ */
+struct kbase_ioctl_sticky_resource_unmap {
+	__u64 count;
+	__u64 address;
+};
+
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
+
+/**
+ * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
+ *                                                   the GPU memory region for
+ *                                                   the given gpu address and
+ *                                                   the offset of that address
+ *                                                   into the region
+ *
+ * @gpu_addr: GPU virtual address
+ * @size: Size in bytes within the region
+ * @start: Address of the beginning of the memory region enclosing @gpu_addr
+ *         for the length of @offset bytes
+ * @offset: The offset from the start of the memory region to @gpu_addr
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_mem_find_gpu_start_and_offset {
+	struct {
+		__u64 gpu_addr;
+		__u64 size;
+	} in;
+	struct {
+		__u64 start;
+		__u64 offset;
+	} out;
+};
+
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
+
+
+#define KBASE_IOCTL_CINSTR_GWT_START \
+	_IO(KBASE_IOCTL_TYPE, 33)
+
+#define KBASE_IOCTL_CINSTR_GWT_STOP \
+	_IO(KBASE_IOCTL_TYPE, 34)
+
+/**
+ * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
+ * @addr_buffer: Address of buffer to hold addresses of gpu modified areas.
+ * @size_buffer: Address of buffer to hold size of modified areas (in pages)
+ * @len: Number of addresses the buffers can hold.
+ * @more_data_available: Status indicating if more addresses are available.
+ * @no_of_addr_collected: Number of addresses collected into addr_buffer.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ *
+ * This structure is used when performing a call to dump GPU write fault
+ * addresses.
+ */
+union kbase_ioctl_cinstr_gwt_dump {
+	struct {
+		__u64 addr_buffer;
+		__u64 size_buffer;
+		__u32 len;
+		__u32 padding;
+
+	} in;
+	struct {
+		__u32 no_of_addr_collected;
+		__u8 more_data_available;
+		__u8 padding[27];
+	} out;
+};
+
+#define KBASE_IOCTL_CINSTR_GWT_DUMP \
+	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+
+
+/***************
+ * test ioctls *
+ ***************/
+#if MALI_UNIT_TEST
+/* These ioctls are purely for test purposes and are not used in the production
+ * driver, they therefore may change without notice
+ */
+
+#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
+
+/**
+ * struct kbase_ioctl_tlstream_test - Start a timeline stream test
+ *
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ */
+struct kbase_ioctl_tlstream_test {
+	__u32 tpw_count;
+	__u32 msg_delay;
+	__u32 msg_count;
+	__u32 aux_msg;
+};
+
+#define KBASE_IOCTL_TLSTREAM_TEST \
+	_IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test)
+
+/**
+ * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ */
+struct kbase_ioctl_tlstream_stats {
+	__u32 bytes_collected;
+	__u32 bytes_generated;
+};
+
+#define KBASE_IOCTL_TLSTREAM_STATS \
+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+	__u64 cpu_addr;
+	__u8 value;
+	__u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @cpu_addr: Memory address to read
+ * @value: Value read
+ * @padding: Currently unused, must be zero
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_cs_event_memory_read {
+	struct {
+		__u64 cpu_addr;
+	} in;
+	struct {
+		__u8 value;
+		__u8 padding[7];
+	} out;
+};
+
+#endif
+
+/**********************************
+ * Definitions for GPU properties *
+ **********************************/
+#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+
+#define KBASE_GPUPROP_PRODUCT_ID			1
+#define KBASE_GPUPROP_VERSION_STATUS			2
+#define KBASE_GPUPROP_MINOR_REVISION			3
+#define KBASE_GPUPROP_MAJOR_REVISION			4
+/* 5 previously used for GPU speed */
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+/* 7 previously used for minimum GPU speed */
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+
+#define KBASE_GPUPROP_MAX_THREADS			18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
+#define KBASE_GPUPROP_MAX_REGISTERS			21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
+#define KBASE_GPUPROP_IMPL_TECH				24
+
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
+#define KBASE_GPUPROP_RAW_L2_PRESENT			27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
+#define KBASE_GPUPROP_RAW_L2_FEATURES			29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
+#define KBASE_GPUPROP_RAW_AS_PRESENT			33
+#define KBASE_GPUPROP_RAW_JS_PRESENT			34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
+#define KBASE_GPUPROP_RAW_GPU_ID			55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES                  82
+
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
+#define KBASE_GPUPROP_TLS_ALLOC				84
+
+#ifdef __cpluscplus
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 0000000000000..887437a2e76b9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1643 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_tlstream.h>
+
+#include "mali_kbase_dma_fence.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
+							BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const u64 p)
+{
+#ifdef CONFIG_COMPAT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return compat_ptr(p);
+#endif
+	return u64_to_user_ptr(p);
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if (katom->will_fail_event_code) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#if defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
+
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	int err_ret_val = -EINVAL;
+	u32 res_no;
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#ifdef CONFIG_SYNC
+	/*
+	 * When both dma-buf fence and Android native sync is enabled, we
+	 * disable dma-buf fence for contexts that are using Android native
+	 * fences.
+	 */
+	const bool implicit_sync = !kbase_ctx_flag(katom->kctx,
+						   KCTX_NO_IMPLICIT_SYNC);
+#else /* CONFIG_SYNC */
+	const bool implicit_sync = true;
+#endif /* CONFIG_SYNC */
+#endif /* CONFIG_MALI_DMA_FENCE */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return -EINVAL;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = -EINVAL;
+		goto early_err_out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		info.resv_objs = kmalloc_array(katom->nr_extres,
+					sizeof(struct dma_resv *),
+					GFP_KERNEL);
+		if (!info.resv_objs) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+
+		info.dma_fence_excl_bitmap =
+				kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					sizeof(unsigned long), GFP_KERNEL);
+		if (!info.dma_fence_excl_bitmap) {
+			err_ret_val = -ENOMEM;
+			goto early_err_out;
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
+
+		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+				(reg->flags & KBASE_REG_SECURE)) {
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
+		}
+
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
+			goto failed_loop;
+		}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (implicit_sync &&
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct dma_resv *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = alloc;
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock now */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		if (info.dma_fence_resv_count) {
+			int ret;
+
+			ret = kbase_dma_fence_wait(katom, &info);
+			if (ret < 0)
+				goto failed_dma_fence_setup;
+		}
+
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* all done OK */
+	return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (implicit_sync) {
+		kfree(info.resv_objs);
+		kfree(info.dma_fence_excl_bitmap);
+	}
+#endif
+	return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+					struct kbase_jd_atom *katom,
+					u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next,
+				struct kbase_jd_atom, dep_item[d]);
+		list_del(katom->dep_head[d].next);
+
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED);
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = kbase_fence_dep_count_read(dep_atom);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * is_dep_valid - Validate that a dependency is valid for early dependency
+ *                submission
+ * @katom: Dependency atom to validate
+ *
+ * A dependency is valid if any of the following are true :
+ * - It does not exist (a non-existent dependency does not block submission)
+ * - It is in the job scheduler
+ * - It has completed, does not have a failure event code, and has not been
+ *   marked to fail in the future
+ *
+ * Return: true if valid, false otherwise
+ */
+static bool is_dep_valid(struct kbase_jd_atom *katom)
+{
+	/* If there's no dependency then this is 'valid' from the perspective of
+	 * early dependency submission */
+	if (!katom)
+		return true;
+
+	/* Dependency must have reached the job scheduler */
+	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
+		return false;
+
+	/* If dependency has completed and has failed or will fail then it is
+	 * not valid */
+	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+			(katom->event_code != BASE_JD_EVENT_DONE ||
+			katom->will_fail_event_code))
+		return false;
+
+	return true;
+}
+
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = is_dep_valid(
+						dep_atom->dep[0].atom);
+				bool dep1_valid = is_dep_valid(
+						dep_atom->dep[1].atom);
+				bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+				int dep_count;
+
+				dep_count = kbase_fence_dep_count_read(
+								dep_atom);
+				if (likely(dep_count == -1)) {
+					dep_satisfied = true;
+				} else {
+				/*
+				 * There are either still active callbacks, or
+				 * all fences for this @dep_atom has signaled,
+				 * but the worker that will queue the atom has
+				 * not yet run.
+				 *
+				 * Wait for the fences to signal and the fence
+				 * worker to run and handle @dep_atom. If
+				 * @dep_atom was completed due to error on
+				 * @katom, then the fence worker will pick up
+				 * the complete status and error code set on
+				 * @dep_atom above.
+				 */
+					dep_satisfied = false;
+				}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+				if (dep0_valid && dep1_valid && dep_satisfied) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom,
+		struct list_head *completed_jobs_ctx)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	bool need_to_try_schedule_context = false;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->jd_item, &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
+		list_del(completed_jobs.prev);
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+					kbase_ctx_flag(kctx, KCTX_DYING));
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						kbasep_remove_waiting_soft_job(node);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		if (completed_jobs_ctx)
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
+		else
+			kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int queued = 0;
+	int i;
+	int sched_prio;
+	bool ret;
+	bool will_fail = false;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	katom->start_timestamp.tv64 = 0;
+#else
+	katom->start_timestamp = 0;
+#endif
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = user_atom->core_req;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
+	katom->x_pre_dep = NULL;
+	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+	katom->softjob_data = NULL;
+
+	/* Implicitly sets katom->protected_state.enter as well. */
+	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_fence_dep_count_set(katom, -1);
+#endif
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+				/* Wrong dependency setup. Atom will be sent
+				 * back to user space. Do not record any
+				 * dependencies. */
+				KBASE_TLSTREAM_TL_NEW_ATOM(
+						katom,
+						kbase_jd_atom_id(kctx, katom));
+				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						katom, kctx);
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+						TL_ATOM_STATE_IDLE);
+
+				ret = jd_done_nolock(katom, NULL);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type;
+		struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+		dep_atom_type = user_atom->pre_dep[i].dependency_type;
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (!dep_atom_number)
+			continue;
+
+		if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+				dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+			if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+				continue;
+			/* don't stop this atom if it has an order dependency
+			 * only to the failed one, try to submit it through
+			 * the normal path
+			 */
+			if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+				continue;
+			}
+
+			/* Atom has completed, propagate the error code if any */
+			katom->event_code = dep_atom->event_code;
+			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+			/* This atom is going through soft replay or
+			 * will be sent back to user space. Do not record any
+			 * dependencies. */
+			KBASE_TLSTREAM_TL_NEW_ATOM(
+					katom,
+					kbase_jd_atom_id(kctx, katom));
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+					TL_ATOM_STATE_IDLE);
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					 == BASE_JD_REQ_SOFT_REPLAY) {
+				if (kbase_replay_process(katom)) {
+					ret = false;
+					goto out;
+				}
+			}
+			will_fail = true;
+
+		} else {
+			/* Atom is in progress, add this atom to the list */
+			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+			kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+			queued = 1;
+		}
+	}
+
+	if (will_fail) {
+		if (!queued) {
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				int err = kbase_prepare_soft_job(katom);
+
+				if (err >= 0)
+					kbase_finish_soft_job(katom);
+			}
+
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+
+			if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+				/* This softjob has failed due to a previous
+				 * dependency, however we should still run the
+				 * prepare & finish functions
+				 */
+				if (kbase_prepare_soft_job(katom) != 0) {
+					katom->event_code =
+						BASE_JD_EVENT_JOB_INVALID;
+					ret = jd_done_nolock(katom, NULL);
+					goto out;
+				}
+			}
+
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
+
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
+	/* Create a new atom recording all dependencies it was set up with. */
+	KBASE_TLSTREAM_TL_NEW_ATOM(
+			katom,
+			kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
+	for (i = 0; i < 2; i++)
+		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+					&katom->dep[i])) {
+			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
+					(void *)kbase_jd_katom_dep_atom(
+						&katom->dep[i]),
+					(void *)katom);
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
+					(void *)dep_atom,
+					(void *)katom);
+		}
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid device_nr %d",
+				katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	/* Reject soft-job atom of certain types from accessing external resources */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) ||
+			 ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting soft-job atom accessing external resources");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+	/* Validate the atom. Function will return error if the atom is
+	 * malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (kbase_prepare_soft_job(katom) != 0) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue(kctx->id, katom->work_id,
+			kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued && !IS_GPU_ATOM(katom)) {
+		ret = false;
+		goto out;
+	}
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (kbase_fence_dep_count_read(katom) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = false;
+		else
+			ret = jd_done_nolock(katom, NULL);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom, NULL);
+			goto out;
+		}
+
+		ret = false;
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+		/* If job was cancelled then resolve immediately */
+		if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+			ret = jd_done_nolock(katom, NULL);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom, NULL);
+	}
+
+ out:
+	return ret;
+}
+
+int kbase_jd_submit(struct kbase_context *kctx,
+		void __user *user_addr, u32 nr_atoms, u32 stride,
+		bool uk6_atom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int err = 0;
+	int i;
+	bool need_to_try_schedule_context = false;
+	struct kbase_device *kbdev;
+	u32 latest_flush;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the
+	 * jobs are reported by immediately failing them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return -EINVAL;
+	}
+
+	if (stride != sizeof(base_jd_atom_v2)) {
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return -EINVAL;
+	}
+
+	/* All atoms submitted in this call have the same flush ID */
+	latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
+	for (i = 0; i < nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+		if (copy_from_user(&user_atom, user_addr,
+					sizeof(user_atom)) != 0) {
+			err = -EINVAL;
+			break;
+		}
+
+		user_addr = (void __user *)((uintptr_t) user_addr + stride);
+
+		mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+		compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+					BASE_JD_ATOM_COUNT,
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+		compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+					sizeof(user_atom.atom_number),
+			"BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		/* Record the flush ID for the cache flush optimisation */
+		katom->flush_id = latest_flush;
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbase_js_sched_all(kbdev);
+
+			if (wait_event_killable(katom->completed,
+					katom->status ==
+					KBASE_JD_ATOM_STATE_UNUSED) != 0) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return 0;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+void kbase_jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool context_idle;
+	base_jd_core_req core_req = katom->core_req;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* Atom has been promoted to stopped */
+		unsigned long flags;
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		kbase_js_unpull(kctx, katom);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&jctx->lock);
+
+		return;
+	}
+
+	if ((katom->event_code != BASE_JD_EVENT_DONE) &&
+			(!kbase_ctx_flag(katom->kctx, KCTX_DYING)))
+		dev_err(kbdev->dev,
+			"t6xx: GPU fault 0x%02lx from job slot %d\n",
+					(unsigned long)katom->event_code,
+								katom->slot_nr);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	context_idle = kbase_js_complete_atom_wq(kctx, katom);
+
+	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+	kbasep_js_remove_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+	jd_done_nolock(katom, &kctx->completed_jobs);
+
+	/* katom may have been freed now, do not use! */
+
+	if (context_idle) {
+		unsigned long flags;
+
+		context_idle = false;
+		mutex_lock(&js_devdata->queue_mutex);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* If kbase_sched() has scheduled this context back in then
+		 * KCTX_ACTIVE will have been set after we marked it as
+		 * inactive, and another pm reference will have been taken, so
+		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
+		 * the context is active and fast-starting is allowed.
+		 *
+		 * If an atom has been fast-started then kctx->atoms_pulled will
+		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * previous pm reference has been inherited). Do NOT drop our
+		 * reference, as it has been re-used, and leave the context as
+		 * active.
+		 *
+		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * be false and atoms_pulled will be zero, so drop the reference
+		 * and call kbase_jm_idle_ctx().
+		 *
+		 * As the checks are done under both the queue_mutex and
+		 * hwaccess_lock is should be impossible for this to race
+		 * with the scheduler code.
+		 */
+		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
+		    !atomic_read(&kctx->atoms_pulled)) {
+			/* Calling kbase_jm_idle_ctx() here will ensure that
+			 * atoms are not fast-started when we drop the
+			 * hwaccess_lock. This is not performed if
+			 * KCTX_ACTIVE is set as in that case another pm
+			 * reference has been taken and a fast-start would be
+			 * valid.
+			 */
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+				kbase_jm_idle_ctx(kbdev, kctx);
+			context_idle = true;
+		} else {
+			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	if (!atomic_dec_return(&kctx->work_count)) {
+		/* If worker now idle then post all events that jd_done_nolock()
+		 * has queued */
+		mutex_lock(&jctx->lock);
+		while (!list_empty(&kctx->completed_jobs)) {
+			struct kbase_jd_atom *atom = list_entry(
+					kctx->completed_jobs.next,
+					struct kbase_jd_atom, jd_item);
+			list_del(kctx->completed_jobs.next);
+
+			kbase_event_post(kctx, atom);
+		}
+		mutex_unlock(&jctx->lock);
+	}
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+
+	if (context_idle)
+		kbase_pm_context_idle(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the kbase_jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool need_to_try_schedule_context;
+	bool attr_state_changed;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+
+	if (attr_state_changed)
+		kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ *   This can be called safely from atomic context.
+ *   The caller must hold kbdev->hwaccess_lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+
+	katom->slot_nr = slot_nr;
+
+	atomic_inc(&kctx->work_count);
+
+#ifdef CONFIG_DEBUG_FS
+	/* a failed job happened and is waiting for dumping*/
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
+		return;
+#endif
+
+	WARN_ON(work_pending(&katom->work));
+	INIT_WORK(&katom->work, kbase_jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	WARN_ON(work_pending(&katom->work));
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+	kbase_js_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	del_timer_sync(&kctx->soft_job_timeout);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
+	kbase_jm_wait_for_zero_jobs(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	int mali_err = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = -ENOMEM;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
+		kctx->jctx.atoms[i].dma_fence.context =
+						dma_fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+	kctx->jctx.job_nr = 0;
+	INIT_LIST_HEAD(&kctx->completed_jobs);
+	atomic_set(&kctx->work_count, 0);
+
+	return 0;
+
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 0000000000000..271daef142264
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,241 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+#include <mali_kbase.h>
+#include <mali_kbase_jd_debugfs.h>
+#include <mali_kbase_dma_fence.h>
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <mali_kbase_ioctl.h>
+
+struct kbase_jd_debugfs_depinfo {
+	u8 id;
+	char type;
+};
+
+static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom,
+					struct seq_file *sfile)
+{
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	struct kbase_sync_fence_info info;
+	int res;
+
+	switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		res = kbase_sync_fence_out_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Sa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		res = kbase_sync_fence_in_info_get(atom, &info);
+		if (0 == res) {
+			seq_printf(sfile, "Wa([%p]%d) ",
+				   info.fence, info.status);
+			break;
+		}
+	default:
+		break;
+	}
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		struct kbase_fence_cb *cb;
+
+		if (atom->dma_fence.fence) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = atom->dma_fence.fence;
+#else
+			struct dma_fence *fence = atom->dma_fence.fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Sd(%u#%u: %s) ",
+#else
+					"Sd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+
+		list_for_each_entry(cb, &atom->dma_fence.callbacks,
+				    node) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+			struct fence *fence = cb->fence;
+#else
+			struct dma_fence *fence = cb->fence;
+#endif
+
+			seq_printf(sfile,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+					"Wd(%u#%u: %s) ",
+#else
+					"Wd(%llu#%u: %s) ",
+#endif
+					fence->context,
+					fence->seqno,
+					dma_fence_is_signaled(fence) ?
+						"signaled" : "active");
+		}
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+}
+
+static void kbasep_jd_debugfs_atom_deps(
+		struct kbase_jd_debugfs_depinfo *deps,
+		struct kbase_jd_atom *atom)
+{
+	struct kbase_context *kctx = atom->kctx;
+	int i;
+
+	for (i = 0; i < 2; i++)	{
+		deps[i].id = (unsigned)(atom->dep[i].atom ?
+				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
+
+		switch (atom->dep[i].dep_type) {
+		case BASE_JD_DEP_TYPE_INVALID:
+			deps[i].type = ' ';
+			break;
+		case BASE_JD_DEP_TYPE_DATA:
+			deps[i].type = 'D';
+			break;
+		case BASE_JD_DEP_TYPE_ORDER:
+			deps[i].type = '>';
+			break;
+		default:
+			deps[i].type = '?';
+			break;
+		}
+	}
+}
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data:  Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print version */
+	seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+	/* Print U/K API version */
+	seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+			BASE_UK_VERSION_MINOR);
+
+	/* Print table heading */
+	seq_puts(sfile, " ID, Core req, St, CR,   Predeps,           Start time, Additional info...\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+		struct kbase_jd_debugfs_depinfo deps[2];
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		kbasep_jd_debugfs_atom_deps(deps, atom);
+
+		seq_printf(sfile,
+				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				i, atom->core_req, atom->status,
+				atom->coreref_state,
+				deps[0].type, deps[0].id,
+				deps[1].type, deps[1].id,
+				start_timestamp);
+
+
+		kbase_jd_debugfs_fence_info(atom, sfile);
+
+		seq_puts(sfile, "\n");
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Expose all atoms */
+	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+			&kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 0000000000000..ce0cb61f8c279
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#define MALI_JD_DEBUGFS_VERSION 2
+
+/* Forward declarations */
+struct kbase_context;
+
+/**
+ * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 0000000000000..da78a1670d9b8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ *			 @js on the active context.
+ * @kbdev:		Device pointer
+ * @js:			Job slot to run on
+ * @nr_jobs_to_submit:	Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+				int nr_jobs_to_submit)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	kctx = kbdev->hwaccess.active_kctx[js];
+
+	if (!kctx)
+		return true;
+
+	for (i = 0; i < nr_jobs_to_submit; i++) {
+		struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+		if (!katom)
+			return true; /* Context has no jobs on this slot */
+
+		kbase_backend_run_atom(kbdev, katom);
+	}
+
+	return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	u32 ret_mask = 0;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	while (js_mask) {
+		int js = ffs(js_mask) - 1;
+		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+			ret_mask |= (1 << js);
+
+		js_mask &= ~(1 << js);
+	}
+
+	return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick(kbdev, js_mask);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!down_trylock(&js_devdata->schedule_sem)) {
+		kbase_jm_kick_all(kbdev);
+		up(&js_devdata->schedule_sem);
+	}
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == kctx)
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+		return kbase_js_complete_atom(katom, NULL);
+	} else {
+		kbase_js_unpull(katom->kctx, katom);
+		return NULL;
+	}
+}
+
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 0000000000000..c468ea4d20a54
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,115 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev:	Device pointer
+ * @js_mask:	Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ *			 slots.
+ * @kbdev:	Device pointer
+ *
+ * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+	return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev:   Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev:  Device pointer
+ * Context: Caller must hold hwaccess_lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev:	Device pointer
+ * @kctx:	Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the hwaccess_lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ *				  been soft-stopped or will fail due to a
+ *				  dependency
+ * @kbdev:	Device pointer
+ * @katom:	Atom that has been stopped or will be failed
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+			struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev:		Device pointer
+ * @katom:		Atom that has completed
+ * @end_timestamp:	Timestamp of atom completion
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
+		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 0000000000000..66a84447c60ec
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2894 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_ctx_sched.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+	/* The context was descheduled - caller should try scheduling in a new
+	 * one to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+	/* Ctx attributes were changed - caller should try scheduling all
+	 * contexts */
+	KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+	KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+	KBASE_JS_ATOM_SCHED_PRIO_LOW  /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+	BASE_JD_PRIO_HIGH,   /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+	BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+	BASE_JD_PRIO_LOW     /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+#else				/* KBASE_TRACE_ENABLE  */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+	mutex_lock(&kbdev->js_data.runpool_mutex);
+	kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the mmu_hw_mutex and hwaccess_lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	bool result = false;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	as_nr = kctx->as_nr;
+	if (atomic_read(&kctx->refcount) > 0) {
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+
+		kbase_ctx_sched_retain_ctx_refcount(kctx);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+				NULL, 0u, atomic_read(&kctx->refcount));
+		result = true;
+	}
+
+	return result;
+}
+
+/**
+ * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
+}
+
+/**
+ * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no
+ * pullable atoms
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if the ring buffers for all priorities have no pullable atoms,
+ *	   false otherwise.
+ */
+static inline bool
+jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
+ * @js:       Job slot id to iterate.
+ * @prio:     Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
+ *
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
+ *
+ * The HW access lock must always be held when calling this function.
+ */
+static void
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
+
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
+
+		list_del(queue->x_dep_head.next);
+
+		callback(kctx->kbdev, entry);
+	}
+}
+
+/**
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
+ * @js:       Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
+ */
+static inline void
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
+		kbasep_js_ctx_job_cb callback)
+{
+	int prio;
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js:   Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+	int prio;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+		struct kbase_jd_atom *katom;
+
+		katom = jsctx_rb_peek_prio(kctx, js, prio);
+		if (katom)
+			return katom;
+	}
+
+	return NULL;
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/* Atoms must be pulled in the correct order. */
+	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
+}
+
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
+
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int prio = katom->sched_priority;
+	int js = katom->slot_nr;
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
+
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx:  Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_tree_add(kctx, katom);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *jsdd;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	jsdd = &kbdev->js_data;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless
+	 * softstop_always is set */
+	jsdd->softstop_always = false;
+#endif				/* CONFIG_MALI_DEBUG */
+	jsdd->nr_all_contexts_running = 0;
+	jsdd->nr_user_contexts_running = 0;
+	jsdd->nr_contexts_pullable = 0;
+	atomic_set(&jsdd->nr_contexts_runnable, 0);
+	/* No ctx allowed to submit */
+	jsdd->runpool_irq.submit_allowed = 0u;
+	memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+			sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+	memset(jsdd->runpool_irq.slot_affinities, 0,
+			sizeof(jsdd->runpool_irq.slot_affinities));
+	memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+			sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+	jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+	jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+	else
+		jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+	jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+	jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+	else
+		jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+	jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+			jsdd->scheduling_period_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+			jsdd->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+			jsdd->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+			jsdd->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+			jsdd->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+			jsdd->hard_stop_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+			jsdd->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+			jsdd->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+			jsdd->gpu_reset_ticks_dumping);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+			jsdd->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
+
+	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+			jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+			jsdd->hard_stop_ticks_dumping <
+			jsdd->gpu_reset_ticks_dumping)) {
+		dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+		return -EINVAL;
+	}
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+			jsdd->soft_stop_ticks,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+			jsdd->hard_stop_ticks_ss,
+			jsdd->hard_stop_ticks_dumping,
+			jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+			kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&jsdd->runpool_mutex);
+	mutex_init(&jsdd->queue_mutex);
+	spin_lock_init(&kbdev->hwaccess_lock);
+	sema_init(&jsdd->schedule_sem, 1);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) {
+			INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]);
+			INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]);
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller must de-register all contexts before calling this
+	 */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+	KBASE_DEBUG_ASSERT(memcmp(
+				  js_devdata->runpool_irq.ctx_attr_ref_count,
+				  zero_ctx_attr_ref_count,
+				  sizeof(zero_ctx_attr_ref_count)) == 0);
+	CSTD_UNUSED(zero_ctx_attr_ref_count);
+}
+
+int kbasep_js_kctx_init(struct kbase_context *const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i, j;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create
+	 * flags are set */
+	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
+	return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int js;
+	bool update_ctx_count = false;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* The caller must de-register all jobs before calling this */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		update_ctx_count = true;
+		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+	}
+
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (update_ctx_count) {
+		mutex_lock(&kbdev->js_data.runpool_mutex);
+		kbase_backend_ctx_count_changed(kbdev);
+		mutex_unlock(&kbdev->js_data.runpool_mutex);
+	}
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              hwaccess_lock
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+			&kbdev->js_data.ctx_list_pullable[js][kctx->priority]);
+
+	if (!kctx->slots_pullable) {
+		kbdev->js_data.nr_contexts_pullable++;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable |= (1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
+ * @kbdev:  Device pointer
+ * @kctx:   Context to remove from queue
+ * @js:     Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	bool ret = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+	list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+	if (kctx->slots_pullable == (1 << js)) {
+		kbdev->js_data.nr_contexts_pullable--;
+		ret = true;
+		if (!atomic_read(&kctx->atoms_pulled)) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+		}
+	}
+	kctx->slots_pullable &= ~(1 << js);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     hwaccess_lock
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
+						struct kbase_device *kbdev,
+						int js)
+{
+	struct kbase_context *kctx;
+	int i;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i]))
+			continue;
+
+		kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next,
+				struct kbase_context,
+				jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+		return kctx;
+	}
+	return NULL;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ *                         specified slot
+ * @kctx:          Context pointer
+ * @js:            Job slot to use
+ * @is_scheduled:  true if the context is currently scheduled
+ *
+ * Caller must hold hwaccess_lock
+ *
+ * Return:         true if context can be pulled from on specified slot
+ *                 false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+					bool is_scheduled)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *katom;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	js_devdata = &kctx->kbdev->js_data;
+
+	if (is_scheduled) {
+		if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+			return false;
+	}
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return false; /* No pullable atoms */
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return false;
+	if (atomic_read(&katom->blocked))
+		return false; /* next atom blocked */
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return false;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+			return false;
+	}
+
+	return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool ret = true;
+	bool has_dep = false, has_x_dep = false;
+	int js = kbase_js_get_slot(kbdev, katom);
+	int prio = katom->sched_priority;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+		if (dep_atom) {
+			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			int dep_prio = dep_atom->sched_priority;
+
+			/* Dependent atom must already have been submitted */
+			if (!(dep_atom->atom_flags &
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
+				ret = false;
+				break;
+			}
+
+			/* Dependencies with different priorities can't
+			  be represented in the ringbuffer */
+			if (prio != dep_prio) {
+				ret = false;
+				break;
+			}
+
+			if (js == dep_js) {
+				/* Only one same-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
+				has_dep = true;
+			} else {
+				/* Only one cross-slot dependency can be
+				 * represented in the ringbuffer */
+				if (has_x_dep) {
+					ret = false;
+					break;
+				}
+				/* Each dependee atom can only have one
+				 * cross-slot dependency */
+				if (dep_atom->x_post_dep) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already be in the
+				 * HW access ringbuffer */
+				if (dep_atom->gpu_rb_state !=
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+					ret = false;
+					break;
+				}
+				/* The dependee atom can not already have
+				 * completed */
+				if (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_IN_JS) {
+					ret = false;
+					break;
+				}
+				/* Cross-slot dependencies must not violate
+				 * PRLAM-8987 affinity restrictions */
+				if (kbase_hw_has_issue(kbdev,
+							BASE_HW_ISSUE_8987) &&
+						(js == 2 || dep_js == 2)) {
+					ret = false;
+					break;
+				}
+				has_x_dep = true;
+			}
+
+			/* Dependency can be represented in ringbuffers */
+		}
+	}
+
+	/* If dependencies can be represented by ringbuffer then clear them from
+	 * atom structure */
+	if (ret) {
+		for (i = 0; i < 2; i++) {
+			struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+			if (dep_atom) {
+				int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+				if ((js != dep_js) &&
+					(dep_atom->status !=
+						KBASE_JD_ATOM_STATE_COMPLETED)
+					&& (dep_atom->status !=
+					KBASE_JD_ATOM_STATE_HW_COMPLETED)
+					&& (dep_atom->status !=
+						KBASE_JD_ATOM_STATE_UNUSED)) {
+
+					katom->atom_flags |=
+						KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+					katom->x_pre_dep = dep_atom;
+					dep_atom->x_post_dep = katom;
+					if (kbase_jd_katom_dep_type(
+							&katom->dep[i]) ==
+							BASE_JD_DEP_TYPE_DATA)
+						katom->atom_flags |=
+						KBASE_KATOM_FLAG_FAIL_BLOCKER;
+				}
+				if ((kbase_jd_katom_dep_type(&katom->dep[i])
+						== BASE_JD_DEP_TYPE_DATA) &&
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
+
+				list_del(&katom->dep_item[i]);
+				kbase_jd_katom_dep_clear(&katom->dep[i]);
+			}
+		}
+	}
+
+	return ret;
+}
+
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int js;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Move kctx to the pullable/upullable list as per the new priority */
+	if (new_priority != kctx->priority) {
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kctx->slots_pullable & (1 << js))
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_pullable[js][new_priority]);
+			else
+				list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+					&kbdev->js_data.ctx_list_unpullable[js][new_priority]);
+		}
+
+		kctx->priority = new_priority;
+	}
+}
+
+void kbase_js_update_ctx_priority(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW;
+	int prio;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) {
+		/* Determine the new priority for context, as per the priority
+		 * of currently in-use atoms.
+		 */
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			if (kctx->atoms_count[prio]) {
+				new_priority = prio;
+				break;
+			}
+		}
+	}
+
+	kbase_js_set_ctx_priority(kctx, new_priority);
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+		struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	bool enqueue_required = false;
+	bool timer_sync = false;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (++kctx->atoms_count[atom->sched_priority] == 1)
+		kbase_js_update_ctx_priority(kctx);
+
+	if (!kbase_js_dep_validate(kctx, atom)) {
+		/* Dependencies could not be represented */
+		--(js_kctx_info->ctx.nr_jobs);
+
+		/* Setting atom status back to queued as it still has unresolved
+		 * dependencies */
+		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+		/* Undo the count, as the atom will get added again later but
+		 * leave the context priority adjusted or boosted, in case if
+		 * this was the first higher priority atom received for this
+		 * context.
+		 * This will prevent the scenario of priority inversion, where
+		 * another context having medium priority atoms keeps getting
+		 * scheduled over this context, which is having both lower and
+		 * higher priority atoms, but higher priority atoms are blocked
+		 * due to dependency on lower priority atoms. With priority
+		 * boost the high priority atom will get to run at earliest.
+		 */
+		kctx->atoms_count[atom->sched_priority]--;
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		goto out_unlock;
+	}
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
+
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	if (enqueue_required) {
+		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+		else
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
+	}
+	/* If this context is active and the atom is the first on its slot,
+	 * kick the job manager to attempt to fast-start the atom */
+	if (enqueue_required && kctx ==
+			kbdev->hwaccess.active_kctx[atom->slot_nr])
+		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+			/* A job got added while/after kbase_job_zap_context()
+			 * was called on a non-scheduled context. Kill that job
+			 * by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+					false);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the
+			 * context on the Queue */
+			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			/* Queue was updated - caller must try to
+			 * schedule the head context */
+			WARN_ON(!enqueue_required);
+		}
+	}
+out_unlock:
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+			kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (--kctx->atoms_count[atom->sched_priority] == 0)
+		kbase_js_update_ctx_priority(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns false for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+			&katom_retained_state);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	unsigned long flags;
+	bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+		int as_nr)
+{
+	unsigned long flags;
+	struct kbase_context *found_kctx = NULL;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+
+	if (found_kctx != NULL)
+		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after
+ *                           releasing a context and/or atom
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ *         changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state,
+		bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+	kbasep_js_release_result result = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) {
+		/* A change in runpool ctx attributes might mean we can
+		 * run more jobs than before  */
+		result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+		KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+					kctx, NULL, 0u, 0);
+	}
+	return result;
+}
+
+/**
+ * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference
+ *                                          on a ctx and an atom's "retained state", only
+ *                                          taking the runpool and as transaction mutexes
+ * @kbdev:                   The kbase_device to operate on
+ * @kctx:                    The kbase_context to operate on
+ * @katom_retained_state:    Retained state from the atom
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Has following requirements
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ *
+ * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating
+ *         the result of releasing a context that whether the caller should try
+ *         scheduling a new context or should try scheduling all contexts.
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+		struct kbase_device *kbdev,
+		struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	kbasep_js_release_result release_result = 0u;
+	bool runpool_ctx_attr_change = false;
+	int kctx_as_nr;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	mutex_lock(&kbdev->pm.lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* Update refcount */
+	kbase_ctx_sched_release_ctx(kctx);
+	new_ref_count = atomic_read(&kctx->refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state))
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+				kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+			new_ref_count);
+
+	if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when
+		 * there are no jobs, in this case we have to handle the
+		 * situation where all jobs have been evicted from the GPU and
+		 * submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs
+		 * to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out.
+	 * Note that there'll always be at least 1 reference to the context
+	 * which was previously acquired by kbasep_js_schedule_ctx(). */
+	if (new_ref_count == 1 &&
+		(!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+							kbdev->pm.suspending)) {
+		int num_slots = kbdev->gpu_props.num_job_slots;
+		int slot;
+
+		/* Last reference, and we've been told to remove this context
+		 * from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d",
+				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+				kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+		kbase_backend_release_ctx_irq(kbdev, kctx);
+
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbdev->hwaccess.active_kctx[slot] == kctx)
+				kbdev->hwaccess.active_kctx[slot] = NULL;
+		}
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or
+		 * after the KCTX_SHEDULED flag is changed, otherwise we
+		 * double-decount the attributes
+		 */
+		runpool_ctx_attr_change |=
+			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow
+		 * more jobs to run */
+		release_result |=
+			kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+						kctx, katom_retained_state,
+						runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready
+		 * for re-use.
+		 *
+		 * Since releases only occur once for each previous successful
+		 * retain, and no more retains are allowed on this context, no
+		 * other thread will be operating in this
+		 * code whilst we are
+		 */
+
+		/* Recalculate pullable status for all slots */
+		for (slot = 0; slot < num_slots; slot++) {
+			if (kbase_js_ctx_pullable(kctx, slot, false))
+				kbase_js_ctx_list_add_pullable_nolock(kbdev,
+						kctx, slot);
+		}
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+		mutex_unlock(&kbdev->pm.lock);
+
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any timers */
+		kbase_backend_ctx_count_changed(kbdev);
+
+		/* update book-keeping info */
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		/* Signal any waiter that the context is not scheduled, so is
+		 * safe for termination - once the jsctx_mutex is also dropped,
+		 * and jobs have finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+				katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->pm.lock);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+							&katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx, bool has_pm_ref)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool Queue, and not added it back to the Runpool
+	 */
+	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Dying: don't requeue, but kill all jobs on the context. This
+		 * happens asynchronously */
+		dev_dbg(kbdev->dev,
+			"JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+		struct kbase_device *kbdev, struct kbase_context *kctx,
+		struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+		kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+			&katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+		struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state =
+		&katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+			katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do
+	 */
+}
+
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_backend_timeouts_changed(kbdev);
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+					struct kbase_context *kctx,
+					int js)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	unsigned long flags;
+	bool kctx_suspended = false;
+	int as_nr;
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Pick available address space for this context */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	as_nr = kbase_ctx_sched_retain_ctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	if (as_nr == KBASEP_AS_NR_INVALID) {
+		as_nr = kbase_backend_find_and_release_free_address_space(
+				kbdev, kctx);
+		if (as_nr != KBASEP_AS_NR_INVALID) {
+			/* Attempt to retain the context again, this should
+			 * succeed */
+			mutex_lock(&kbdev->mmu_hw_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			as_nr = kbase_ctx_sched_retain_ctx(kctx);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kbdev->mmu_hw_mutex);
+
+			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
+		}
+	}
+	if (as_nr == KBASEP_AS_NR_INVALID)
+		return false; /* No address spaces currently available */
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Check to see if context is dying due to kbase_job_zap_context() */
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+				0u,
+				kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+
+	/* Assign context to previously chosen address space */
+	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+		/* Roll back the transaction so far and return */
+		kbase_ctx_sched_release_ctx(kctx);
+		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		return false;
+	}
+
+	kbdev->hwaccess.active_kctx[js] = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Re-check for suspending: a suspend could've occurred, and all the
+	 * contexts could've been removed from the runpool before we took this
+	 * lock. In this case, we don't want to allow this context to run jobs,
+	 * we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part
+	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
+	 * was taken (i.e. this condition doesn't execute), then the
+	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
+	 * of it being called strictly after the suspend flag is set, and will
+	 * wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+		KBASE_DEBUG_ASSERT(retained);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+		kctx_suspended = true;
+	}
+
+	kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	/* Synchronize with any timers */
+	kbase_backend_ctx_count_changed(kbdev);
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled
+	 * out immediately */
+
+	if (kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a
+		 * variant of kbasep_js_runpool_release_ctx() that doesn't
+		 * schedule a new context, to prevent a risk of recursion back
+		 * into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+		return false;
+	}
+	return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+				struct kbase_context *kctx,
+				int js)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
+		/* Context already has ASID - mark as active */
+		if (kbdev->hwaccess.active_kctx[js] != kctx) {
+			kbdev->hwaccess.active_kctx[js] = kctx;
+			kbase_ctx_flag_clear(kctx,
+					KCTX_PULLED_SINCE_ACTIVE_JS0 << js);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return true; /* Context already scheduled */
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	return kbasep_js_schedule_ctx(kbdev, kctx, js);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Mark the context as privileged */
+	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+
+	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	if (!is_scheduled) {
+		/* Add the context to the pullable list */
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
+			kbase_js_sync_timers(kbdev);
+
+		/* Fast-starting requires the jsctx_mutex to be dropped,
+		 * because it works on multiple ctxs */
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		/* Try to schedule the context in */
+		kbase_js_sched_all(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	} else {
+		/* Already scheduled in - We need to retain it to keep the
+		 * corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+	}
+}
+KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx);
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+		struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	kbase_js_sched_all(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx);
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it
+	 * had no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		retained = retained << 1;
+
+		if (kctx && !(kbdev->as_free & (1u << i))) {
+			kbase_ctx_sched_retain_ctx_refcount(kctx);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context -
+			 * the instrumented context. It'll be suspended by
+			 * disabling instrumentation */
+			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbase_context *kctx = kbdev->as_to_kctx[i];
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be
+	 * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js, prio;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	mutex_lock(&js_devdata->queue_mutex);
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+			struct kbase_context *kctx, *n;
+			unsigned long flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			list_for_each_entry_safe(kctx, n,
+				 &kbdev->js_data.ctx_list_unpullable[js][prio],
+				 jctx.sched_info.ctx.ctx_list_entry[js]) {
+				struct kbasep_js_kctx_info *js_kctx_info;
+				bool timer_sync = false;
+
+				/* Drop lock so we can take kctx mutexes */
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+
+				js_kctx_info = &kctx->jctx.sched_info;
+
+				mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+				mutex_lock(&js_devdata->runpool_mutex);
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+				if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+					kbase_js_ctx_pullable(kctx, js, false))
+					timer_sync =
+						kbase_js_ctx_list_add_pullable_nolock(
+								kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+				if (timer_sync)
+					kbase_backend_ctx_count_changed(kbdev);
+				mutex_unlock(&js_devdata->runpool_mutex);
+				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+				/* Take lock before accessing list again */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			}
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		}
+	}
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/* Restart atom processing */
+	kbase_js_sched_all(kbdev);
+
+	/* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if ((katom->core_req & BASE_JD_REQ_FS) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+								BASE_JD_REQ_T)))
+		return false;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+	    (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+		return false;
+
+	return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom)
+{
+	if (katom->core_req & BASE_JD_REQ_FS)
+		return 0;
+
+	if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		if (katom->device_nr == 1 &&
+				kbdev->gpu_props.num_core_groups == 2)
+			return 2;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+			return 2;
+	}
+
+	return 1;
+}
+
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
+{
+	bool enqueue_required;
+
+	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	/* If slot will transition from unpullable to pullable then add to
+	 * pullable list */
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+		enqueue_required = true;
+	} else {
+		enqueue_required = false;
+	}
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Check if there are lower priority jobs to soft stop */
+		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+	}
+
+	return enqueue_required;
+}
+
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+	struct kbase_jd_atom *katom;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	int pulled;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+		return NULL;
+	if (kbase_pm_is_suspending(kbdev))
+		return NULL;
+
+	katom = jsctx_rb_peek(kctx, js);
+	if (!katom)
+		return NULL;
+	if (kctx->blocked_js[js][katom->sched_priority])
+		return NULL;
+	if (atomic_read(&katom->blocked))
+		return NULL;
+
+	/* Due to ordering restrictions when unpulling atoms on failure, we do
+	 * not allow multiple runs of fail-dep atoms from the same context to be
+	 * present on the same slot */
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
+		struct kbase_jd_atom *prev_atom =
+				kbase_backend_inspect_tail(kbdev, js);
+
+		if (prev_atom && prev_atom->kctx != kctx)
+			return NULL;
+	}
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+		if (katom->x_pre_dep->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
+			return NULL;
+		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+				kbase_backend_nr_atoms_on_slot(kbdev, js))
+			return NULL;
+	}
+
+	kbase_ctx_flag_set(kctx, KCTX_PULLED);
+	kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js));
+
+	pulled = atomic_inc_return(&kctx->atoms_pulled);
+	if (pulled == 1 && !kctx->slots_pullable) {
+		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+	}
+	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+	kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++;
+	jsctx_rb_pull(kctx, katom);
+
+	kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+	katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+	katom->ticks = 0;
+
+	return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+									work);
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbasep_js_atom_retained_state retained_state;
+	int js = katom->slot_nr;
+	int prio = katom->sched_priority;
+	bool timer_sync = false;
+	bool context_idle = false;
+	unsigned long flags;
+	base_jd_core_req core_req = katom->core_req;
+	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+
+	kbase_backend_complete_wq(kbdev, katom);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	atomic_dec(&kctx->atoms_pulled);
+	atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+	atomic_dec(&katom->blocked);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--;
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+			jsctx_rb_none_to_pull(kctx, js))
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
+
+	/* If this slot has been blocked due to soft-stopped atoms, and all
+	 * atoms have now been processed, then unblock the slot */
+	if (!kctx->atoms_pulled_slot_pri[js][prio] &&
+			kctx->blocked_js[js][prio]) {
+		kctx->blocked_js[js][prio] = false;
+
+		/* Only mark the slot as pullable if the context is not idle -
+		 * that case is handled below */
+		if (atomic_read(&kctx->atoms_pulled) &&
+				kbase_js_ctx_pullable(kctx, js, true))
+			timer_sync |= kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, js);
+	}
+
+	if (!atomic_read(&kctx->atoms_pulled)) {
+		if (!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+			int num_slots = kbdev->gpu_props.num_job_slots;
+			int slot;
+
+			if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+				kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+			for (slot = 0; slot < num_slots; slot++) {
+				if (kbase_js_ctx_pullable(kctx, slot, true))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, slot);
+			}
+		}
+
+		kbase_jm_idle_ctx(kbdev, kctx);
+
+		context_idle = true;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (context_idle) {
+		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+							&retained_state);
+
+	kbase_js_sched_all(kbdev);
+
+	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	jsctx_rb_unpull(kctx, katom);
+
+	WARN_ON(work_pending(&katom->work));
+
+	/* Block re-submission until workqueue has run */
+	atomic_inc(&katom->blocked);
+
+	kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+	INIT_WORK(&katom->work, js_return_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+						struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool timer_sync = false;
+	int atom_slot;
+	bool context_idle = false;
+	int prio = katom->sched_priority;
+
+	kbdev = kctx->kbdev;
+	atom_slot = katom->slot_nr;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
+		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+		kctx->atoms_pulled_slot_pri[atom_slot][prio]--;
+
+		if (!atomic_read(&kctx->atoms_pulled) &&
+				!kctx->slots_pullable) {
+			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
+			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
+		}
+
+		/* If this slot has been blocked due to soft-stopped atoms, and
+		 * all atoms have now been processed, then unblock the slot */
+		if (!kctx->atoms_pulled_slot_pri[atom_slot][prio]
+				&& kctx->blocked_js[atom_slot][prio]) {
+			kctx->blocked_js[atom_slot][prio] = false;
+			if (kbase_js_ctx_pullable(kctx, atom_slot, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+						kbdev, kctx, atom_slot);
+		}
+	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
+
+	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
+
+	/*
+	 * If submission is disabled on this context (most likely due to an
+	 * atom failure) and there are now no atoms left in the system then
+	 * re-enable submission so that context can be scheduled again.
+	 */
+	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+					!atomic_read(&kctx->atoms_pulled) &&
+					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+		int js;
+
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	} else if (katom->x_post_dep &&
+			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+		int js;
+
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kbdev, kctx, js);
+		}
+	}
+
+	/* Mark context as inactive. The pm reference will be dropped later in
+	 * jd_done_worker().
+	 */
+	if (context_idle)
+		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	if (timer_sync)
+		kbase_backend_ctx_count_changed(kbdev);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return context_idle;
+}
+
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp)
+{
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+
+	kbdev = kctx->kbdev;
+
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
+	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+				katom->slot_nr), NULL, 0);
+#endif
+
+	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+	/* Unblock cross dependency if present */
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
+			return x_dep;
+	}
+
+	return NULL;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
+	bool timer_sync = false;
+	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
+	int js;
+
+	js_devdata = &kbdev->js_data;
+
+	down(&js_devdata->schedule_sem);
+	mutex_lock(&js_devdata->queue_mutex);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		last_active[js] = kbdev->hwaccess.active_kctx[js];
+		ctx_waiting[js] = false;
+	}
+
+	while (js_mask) {
+		js = ffs(js_mask) - 1;
+
+		while (1) {
+			struct kbase_context *kctx;
+			unsigned long flags;
+			bool context_idle = false;
+
+			kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+			if (!kctx) {
+				js_mask &= ~(1 << js);
+				break; /* No contexts on pullable list */
+			}
+
+			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+				context_idle = true;
+
+				if (kbase_pm_context_active_handle_suspend(
+									kbdev,
+				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+					/* Suspend pending - return context to
+					 * queue and stop scheduling */
+					mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					if (kbase_js_ctx_list_add_pullable_head(
+						kctx->kbdev, kctx, js))
+						kbase_js_sync_timers(kbdev);
+					mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_unlock(&js_devdata->queue_mutex);
+					up(&js_devdata->schedule_sem);
+					return;
+				}
+				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			}
+
+			if (!kbase_js_use_ctx(kbdev, kctx, js)) {
+				mutex_lock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				/* Context can not be used at this time */
+				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				if (kbase_js_ctx_pullable(kctx, js, false)
+				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev, kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+						flags);
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+				if (context_idle) {
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				}
+
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+				break;
+			}
+			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+
+			if (!kbase_jm_kick(kbdev, 1 << js))
+				/* No more jobs can be submitted on this slot */
+				js_mask &= ~(1 << js);
+
+			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+				bool pullable = kbase_js_ctx_pullable(kctx, js,
+						true);
+
+				/* Failed to pull jobs - push to head of list.
+				 * Unless this context is already 'active', in
+				 * which case it's effectively already scheduled
+				 * so push it to the back of the list. */
+				if (pullable && kctx == last_active[js] &&
+						kbase_ctx_flag(kctx,
+						(KCTX_PULLED_SINCE_ACTIVE_JS0 <<
+						js)))
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else if (pullable)
+					timer_sync |=
+					kbase_js_ctx_list_add_pullable_head_nolock(
+							kctx->kbdev,
+							kctx, js);
+				else
+					timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+								kctx->kbdev,
+								kctx, js);
+
+				/* If this context is not the active context,
+				 * but the active context is pullable on this
+				 * slot, then we need to remove the active
+				 * marker to prevent it from submitting atoms in
+				 * the IRQ handler, which would prevent this
+				 * context from making progress. */
+				if (last_active[js] && kctx != last_active[js]
+						&& kbase_js_ctx_pullable(
+						last_active[js], js, true))
+					ctx_waiting[js] = true;
+
+				if (context_idle) {
+					kbase_jm_idle_ctx(kbdev, kctx);
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
+					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					kbase_pm_context_idle(kbdev);
+				} else {
+					spin_unlock_irqrestore(
+							&kbdev->hwaccess_lock,
+							flags);
+				}
+				mutex_unlock(
+					&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+				js_mask &= ~(1 << js);
+				break; /* Could not run atoms on this slot */
+			}
+
+			/* Push to back of list */
+			if (kbase_js_ctx_pullable(kctx, js, true))
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
+							kctx->kbdev, kctx, js);
+			else
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
+							kctx->kbdev, kctx, js);
+
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+		}
+	}
+
+	if (timer_sync)
+		kbase_js_sync_timers(kbdev);
+
+	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
+		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
+				ctx_waiting[js])
+			kbdev->hwaccess.active_kctx[js] = NULL;
+	}
+
+	mutex_unlock(&js_devdata->queue_mutex);
+	up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	int js;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the queue */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&js_devdata->queue_mutex);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbase_ctx_flag_set(kctx, KCTX_DYING);
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the queue, but now no
+	 *   longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 *    release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 *    those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the queue. It is one
+	 *   of the following:
+	 *  - a. it didn't have any jobs, and so is not in the Queue or
+	 *       the Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager
+	 *     active reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 *       have at least 1 job). This can happen from a syscall or a
+	 *       kernel thread. We still hold the jsctx_mutex, and so the thread
+	 *       must be waiting inside kbasep_js_try_schedule_head_ctx(),
+	 *       before checking whether the runpool is full. That thread will
+	 *       continue after we drop the mutex, and will notice the context
+	 *       is dying. It will rollback the transaction, killing all jobs at
+	 *       the same time. kbase_jd_zap_context() will wait for those jobs
+	 *       to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the
+	 *     Power Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from
+	 * submitting any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference,
+	 * and will not requeue the context in the queue.
+	 * kbase_jd_zap_context() will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 *    jobs and releasing the Power manager active reference will be
+	 *    handled when it leaves the runpool.
+	 */
+	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (!list_empty(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+				list_del_init(
+				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* The following events require us to kill off remaining jobs
+		 * and update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the
+		 *   Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-
+		 *           back (this already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+		/* Only cancel jobs when we evicted from the
+		 * queue. No Power Manager active reference was held.
+		 *
+		 * Having is_dying set ensures that this kills, and
+		 * doesn't requeue */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+	} else {
+		unsigned long flags;
+		bool was_retained;
+
+		/* Case c: didn't evict, but it is scheduled - it's in the Run
+		 * Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now
+		 * disallowed from submitting jobs - ensures that someone
+		 * somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be
+		 * retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+		/* Cancel any remaining running jobs for this kctx - if any.
+		 * Submit is disallowed which takes effect immediately, so no
+		 * more new jobs will appear after we do this. */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+			kbase_job_slot_hardstop(kctx, js, NULL);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+									kctx);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the
+	 * kbase_jd_context::zero_jobs_wait and the
+	 * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+	 * to be destroyed, and the context to be de-scheduled (if it was on the
+	 * runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+					struct kbase_context *kctx)
+{
+	return atomic_read(&kctx->refcount);
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx:     Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+		kbasep_js_ctx_job_cb callback)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	u32 js;
+
+	kbdev = kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+					0u, trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each slot in turn */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+		jsctx_queue_foreach(kctx, js, callback);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 0000000000000..355da27edc1b9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,912 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_context.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase.
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the hwaccess_lock, (as this will be obtained
+ *   internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be
+ *   used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - mmu_hw_mutex, hwaccess_lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *   If the hwaccess_lock is already held, then the caller should use
+ *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - hwaccess_lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduler's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time.
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to submit
+ *
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
+
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context to pull from
+ * @param[in] js    Job slot to pull from
+ * @return          Pointer to an atom, or NULL if there are no atoms for this
+ *                  slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] atom  Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx  Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ * @return true if the context is now idle (no jobs pulled)
+ *         false otherwise
+ */
+bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom         Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ *
+ * Return: Atom that has now been unblocked and can now be run, or NULL if none
+ */
+struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+		ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev    Device pointer
+ * @param[in] js_mask  Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ *                        destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev  Device pointer
+ * @param[in] katom  Atom to validate
+ * @return           true if atom is valid
+ *                   false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
+/**
+ * kbase_js_set_ctx_priority - set the context priority
+ * @kctx: Context pointer
+ * @new_priority: New priority value for the Context
+ *
+ * The context priority is set to a new value and it is moved to the
+ * pullable/unpullable list as per the new priority.
+ */
+void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority);
+
+
+/**
+ * kbase_js_update_ctx_priority - update the context priority
+ * @kctx: Context pointer
+ *
+ * The context priority gets updated as per the priority of atoms currently in
+ * use for that context, but only if system priority mode for context scheduling
+ * is being used.
+ */
+void kbase_js_update_ctx_priority(struct kbase_context *kctx);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold hwaccess_lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+			kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->sched_priority = katom->sched_priority;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    false if the atom has not finished
+ * @return    !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    false if the retained state is invalid, and can be ignored
+ * @return    !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guaranteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guaranteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_context *found_kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+	found_kctx = kbdev->as_to_kctx[as_nr];
+	KBASE_DEBUG_ASSERT(found_kctx == NULL ||
+			atomic_read(&found_kctx->refcount) > 0);
+
+	return found_kctx;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+									S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+						struct kbase_device *kbdev,
+						struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev    Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+	kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ *                                        to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ *         0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ *         KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+	if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+		return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+	return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+	unsigned int prio_idx;
+
+	KBASE_DEBUG_ASSERT(0 <= sched_prio
+			&& sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+	prio_idx = (unsigned int)sched_prio;
+
+	return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 0000000000000..6fd908aceb665
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = true;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->hwaccess_lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	bool runpool_state_changed = false;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	bool runpool_state_changed = false;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+		return false;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 0000000000000..be781e60c822a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 0000000000000..7385daa42e94f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,415 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/*
+ * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
+ */
+enum {
+	/*
+	 * In this mode, the context containing higher priority atoms will be
+	 * scheduled first and also the new runnable higher priority atoms can
+	 * preempt lower priority atoms currently running on the GPU, even if
+	 * they belong to a different context.
+	 */
+	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
+
+	/*
+	 * In this mode, the contexts are scheduled in round-robin fashion and
+	 * the new runnable higher priority atoms can preempt the lower priority
+	 * atoms currently running on the GPU, only if they belong to the same
+	 * context.
+	 */
+	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
+
+	/* Must be the last in the enum */
+	KBASE_JS_PRIORITY_MODE_COUNT,
+};
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+	KBASE_JS_ATOM_SCHED_PRIO_MED,
+	KBASE_JS_ATOM_SCHED_PRIO_LOW,
+	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace.
+ */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/* Sub-structure to collect together Job Scheduling data used in IRQ
+	 * context. The hwaccess_lock must be held when accessing. */
+	struct runpool_irq {
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' is allowed to submit jobs.
+		 */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	/**
+	 * Scheduling semaphore. This must be held when calling
+	 * kbase_jm_kick()
+	 */
+	struct semaphore schedule_sem;
+
+	/**
+	 * List of contexts that can currently be pulled from
+	 */
+	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+	/**
+	 * List of contexts that can not currently be pulled from, but have
+	 * jobs currently running.
+	 */
+	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
+	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
+
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+
+	/* Number of contexts that can currently be pulled from */
+	u32 nr_contexts_pullable;
+
+	/* Number of contexts that can either be pulled from or are currently
+	 * running */
+	atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct kbase_jsctx {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/**
+		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		/** Link implementing JS queues. Context can be present on one
+		 * list per job slot
+		 */
+		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/* priority */
+	int sched_priority;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 0000000000000..003ac9e68a76c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+	#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 0000000000000..3eff83a4f0cef
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,3698 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+#include <linux/log2.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_tlstream.h>
+
+/* Forward declarations */
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp);
+
+static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
+{
+#if defined(CONFIG_ARM64)
+	/* VA_BITS can be as high as 48 bits, but all bits are available for
+	 * both user and kernel.
+	 */
+	size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	/* x86_64 can access 48 bits of VA, but the 48th is used to denote
+	 * kernel (1) vs userspace (0), so the max here is 47.
+	 */
+	size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+	size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+		cpu_va_bits = 32;
+#endif
+
+	return cpu_va_bits;
+}
+
+/* This function finds out which RB tree the given pfn from the GPU VA belongs
+ * to based on the memory zone the pfn refers to */
+static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
+								    u64 gpu_pfn)
+{
+	struct rb_root *rbtree = NULL;
+
+#ifdef CONFIG_64BIT
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif /* CONFIG_64BIT */
+		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+#ifdef CONFIG_64BIT
+	} else {
+		if (gpu_pfn >= kctx->same_va_end)
+			rbtree = &kctx->reg_rbtree_custom;
+		else
+			rbtree = &kctx->reg_rbtree_same;
+	}
+#endif /* CONFIG_64BIT */
+
+	return rbtree;
+}
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = NULL;
+	struct rb_node *parent = NULL;
+	struct rb_root *rbtree = NULL;
+
+	rbtree = new_reg->rbtree;
+
+	link = &(rbtree->rb_node);
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+
+	rb_insert_color(&(new_reg->rblink), rbtree);
+}
+
+static struct kbase_va_region *find_region_enclosing_range_rbtree(
+		struct rb_root *rbtree, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	return kbase_find_region_enclosing_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+struct kbase_va_region *kbase_find_region_base_address(
+		struct rb_root *rbtree, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+
+	rbnode = rbtree->rb_node;
+
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else
+			return reg;
+
+	}
+
+	return NULL;
+}
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_root *rbtree = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+	return kbase_find_region_base_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
+		struct kbase_va_region *reg_reqs,
+		size_t nr_pages, size_t align_offset, size_t align_mask,
+		u64 *out_start_pfn)
+{
+	struct rb_node *rbnode = NULL;
+	struct kbase_va_region *reg = NULL;
+	struct rb_root *rbtree = NULL;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbtree = reg_reqs->rbtree;
+
+	for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE)) {
+			/* Check alignment */
+			u64 start_pfn = reg->start_pfn;
+
+			/* When align_offset == align, this sequence is
+			 * equivalent to:
+			 *   (start_pfn + align_mask) & ~(align_mask)
+			 *
+			 * Otherwise, it aligns to n*align + offset, for the
+			 * lowest value n that makes this still >start_pfn */
+			start_pfn += align_mask;
+			start_pfn -= (start_pfn - align_offset) & (align_mask);
+
+			if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) {
+				/* Can't end at 4GB boundary */
+				if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				/* Can't start at 4GB boundary */
+				if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB))
+					start_pfn += align_offset;
+
+				if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) ||
+				    !(start_pfn & BASE_MEM_PFN_MASK_4GB))
+					continue;
+			} else if (reg_reqs->flags &
+					KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				u64 end_pfn = start_pfn + nr_pages - 1;
+
+				if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) !=
+				    (end_pfn & ~BASE_MEM_PFN_MASK_4GB))
+					start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB;
+			}
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
+				*out_start_pfn = start_pfn;
+				return reg;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+int kbase_remove_va_region(struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+	struct rb_root *reg_rbtree = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	int err = 0;
+
+	reg_rbtree = reg->rbtree;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if (prev->flags & KBASE_REG_FREE) {
+			/* We're compatible with the previous VMA,
+			 * merge with it */
+			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if (next->flags & KBASE_REG_FREE) {
+			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
+					    (reg->flags & KBASE_REG_ZONE_MASK));
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(reg_rbtree,
+				reg->start_pfn, reg->nr_pages,
+				reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * kbase_insert_va_region_nolock - Insert a VA region to the list,
+ * replacing the existing one.
+ *
+ * @new_reg: The new region to insert
+ * @at_reg: The region to replace
+ * @start_pfn: The Page Frame Number to insert at
+ * @nr_pages: The number of pages of the region
+ */
+static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
+		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_root *reg_rbtree = NULL;
+	int err = 0;
+
+	reg_rbtree = at_reg->rbtree;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
+								reg_rbtree);
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(reg_rbtree,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(new_front_reg);
+			kbase_region_tracker_insert(new_reg);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * kbase_add_va_region - Add a VA region to the region list for a context.
+ *
+ * @kctx: kbase context containing the region
+ * @reg: the region to add
+ * @addr: the address to insert the region at
+ * @nr_pages: the number of pages in the region
+ * @align: the minimum alignment in pages
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 addr,
+		size_t nr_pages, size_t align)
+{
+	int err = 0;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx);
+	int gpu_pc_bits =
+		kbdev->gpu_props.props.core_props.log2_program_counter_size;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* The executable allocation from the SAME_VA zone would already have an
+	 * appropriately aligned GPU VA chosen for it.
+	 */
+	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) {
+		if (cpu_va_bits > gpu_pc_bits) {
+			align = max(align, (size_t)((1ULL << gpu_pc_bits)
+						>> PAGE_SHIFT));
+		}
+	}
+
+	do {
+		err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages,
+				align);
+		if (err != -ENOMEM)
+			break;
+
+		/*
+		 * If the allocation is not from the same zone as JIT
+		 * then don't retry, we're out of VA and there is
+		 * nothing which can be done about it.
+		 */
+		if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+				KBASE_REG_ZONE_CUSTOM_VA)
+			break;
+	} while (kbase_jit_evict(kctx));
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * @kbdev: The kbase device
+ * @reg: The region to add
+ * @addr: The address to add the region at, or 0 to map at any available address
+ * @nr_pages: The size of the region in pages
+ * @align: The minimum alignment in pages
+ */
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align)
+{
+	struct rb_root *rbtree = NULL;
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	int err = 0;
+
+	rbtree = reg->rbtree;
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT(is_power_of_2(align));
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region,
+	 * which *must* be free.
+	 */
+	if (gpu_pfn) {
+		struct device *dev = kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
+				nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+		if (!(tmp->flags & KBASE_REG_FREE)) {
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
+					tmp->start_pfn, tmp->flags,
+					tmp->nr_pages, gpu_pfn, nr_pages);
+			err = -ENOMEM;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
+				nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = -ENOMEM;
+		}
+	} else {
+		/* Path 2: Map any free address which meets the requirements. */
+		u64 start_pfn;
+		size_t align_offset = align;
+		size_t align_mask = align - 1;
+
+		if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+			WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+					__func__,
+					(unsigned long)align);
+			align_mask  = reg->extent - 1;
+			align_offset = reg->extent - reg->initial_commit;
+		}
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(reg,
+				nr_pages, align_offset, align_mask,
+				&start_pfn);
+		if (tmp) {
+			err = kbase_insert_va_region_nolock(reg, tmp,
+							start_pfn, nr_pages);
+		} else {
+			err = -ENOMEM;
+		}
+	}
+
+exit:
+	return err;
+}
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(same_va_reg);
+
+	/* Although custom_va_reg doesn't always exist,
+	 * initialize unconditionally because of the mem_view debugfs
+	 * implementation which relies on this being empty.
+	 */
+	kctx->reg_rbtree_custom = RB_ROOT;
+
+	if (custom_va_reg)
+		kbase_region_tracker_insert(custom_va_reg);
+}
+
+static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(rbtree);
+		if (rbnode) {
+			rb_erase(rbnode, rbtree);
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+}
+
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
+{
+	kbase_region_tracker_erase_rbtree(rbtree);
+}
+
+static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
+{
+	return min(kbase_get_num_cpu_va_bits(kctx),
+			(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = kbase_get_same_va_bits(kctx);
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1,
+			same_va_pages,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+#ifdef CONFIG_64BIT
+	/* 32-bit clients have custom VA zones */
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			err = -EINVAL;
+			goto fail_free_same_va;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		custom_va_reg = kbase_alloc_free_region(
+				&kctx->reg_rbtree_custom,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			err = -ENOMEM;
+			goto fail_free_same_va;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
+
+	kctx->same_va_end = same_va_pages + 1;
+
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+#ifdef CONFIG_64BIT
+static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
+		u64 jit_va_pages)
+{
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits = kbase_get_same_va_bits(kctx);
+	u64 total_va_size;
+	int err;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+#endif
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level)
+{
+	if (trim_level > 100)
+		return -EINVAL;
+
+	kctx->jit_max_allocations = max_allocations;
+	kctx->trim_level = trim_level;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+#endif
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	return 0;
+}
+
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int ret;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	ret = kbase_mem_pool_init(&kbdev->mem_pool,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		return ret;
+
+	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
+			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
+			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+			kbdev,
+			NULL);
+	if (ret)
+		kbase_mem_pool_term(&kbdev->mem_pool);
+
+	return ret;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	kbase_mem_pool_term(&kbdev->mem_pool);
+	kbase_mem_pool_term(&kbdev->lp_mem_pool);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(rbtree != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg)
+		return NULL;
+
+	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+	new_reg->rbtree = rbtree;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	INIT_LIST_HEAD(&new_reg->jit_node);
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+static struct kbase_context *kbase_reg_flags_to_kctx(
+		struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = NULL;
+	struct rb_root *rbtree = reg->rbtree;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_custom);
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_same);
+		break;
+	default:
+		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+		break;
+	}
+
+	return kctx;
+}
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+		if (WARN_ON(!kctx))
+			return;
+
+
+		mutex_lock(&kctx->jit_evict_lock);
+
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		} else {
+			mutex_unlock(&kctx->jit_evict_lock);
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
+		kbase_mem_phy_alloc_put(reg->cpu_alloc);
+		kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+	int err;
+	size_t i = 0;
+	unsigned long attr;
+	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+	unsigned long gwt_mask = ~0;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
+		(reg->flags & KBASE_REG_SHARE_BOTH))
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+	else
+		attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (err)
+		return err;
+
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+		struct kbase_mem_phy_alloc *alloc;
+
+		alloc = reg->gpu_alloc;
+		stride = alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+		for (i = 0; i < alloc->imported.alias.nents; i++) {
+			if (alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx->kbdev,
+						&kctx->mmu,
+						reg->start_pfn + (i * stride),
+						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+						alloc->imported.alias.aliased[i].length,
+						reg->flags & gwt_mask,
+						kctx->as_nr);
+				if (err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+					reg->start_pfn + i * stride,
+					kctx->aliasing_sink_page,
+					alloc->imported.alias.aliased[i].length,
+					(reg->flags & mask & gwt_mask) | attr);
+
+				if (err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx->kbdev,
+				&kctx->mmu,
+				reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags & gwt_mask,
+				kctx->as_nr);
+		if (err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->gpu_alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx->kbdev,
+					&kctx->mmu,
+					reg->start_pfn + (i * stride),
+					reg->gpu_alloc->imported.alias.aliased[i].length,
+					kctx->as_nr);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	if (reg->start_pfn == 0)
+		return 0;
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+				reg->start_pfn, reg->nr_pages, kctx->as_nr);
+		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn, kbase_reg_current_backed_size(reg),
+			kctx->as_nr);
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+	}
+
+	if (reg->gpu_alloc && reg->gpu_alloc->type ==
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		struct kbase_alloc_import_user_buf *user_buf =
+			&reg->gpu_alloc->imported.user_buf;
+
+		if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+			user_buf->current_mapping_usage_count &=
+				~PINNED_ON_IMPORT;
+
+			kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+					(reg->flags & KBASE_REG_GPU_WR));
+		}
+	}
+
+	if (err)
+		return err;
+
+	err = kbase_remove_va_region(reg);
+	return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct vm_area_struct *vma;
+	struct kbase_cpu_mapping *map;
+	unsigned long vm_pgoff_in_region;
+	unsigned long vm_off_in_region;
+	unsigned long map_start;
+	size_t map_size;
+
+	lockdep_assert_held(&current->mm->mmap_sem);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
+
+	if (!vma || vma->vm_start > uaddr)
+		return NULL;
+	if (vma->vm_ops != &kbase_vm_ops)
+		/* Not ours! */
+		return NULL;
+
+	map = vma->vm_private_data;
+
+	if (map->kctx != kctx)
+		/* Not from this context! */
+		return NULL;
+
+	vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
+	vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
+	map_start = vma->vm_start - vm_off_in_region;
+	map_size = map->region->nr_pages << PAGE_SHIFT;
+
+	if ((uaddr + size) > (map_start + map_size))
+		/* Not within the CPU mapping */
+		return NULL;
+
+	*offset = (uaddr - vma->vm_start) + vm_off_in_region;
+
+	return map;
+}
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset)
+{
+	struct kbase_cpu_mapping *map;
+
+	kbase_os_mem_map_lock(kctx);
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
+
+	kbase_os_mem_map_unlock(kctx);
+
+	if (!map)
+		return -EINVAL;
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset)
+{
+	struct kbase_va_region *region;
+
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+
+	if (!region) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	*start = region->start_pfn << PAGE_SHIFT;
+
+	*offset = gpu_addr - *start;
+
+	if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) {
+		kbase_gpu_vm_unlock(kctx);
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+		struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
+		off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+	struct page *cpu_page;
+	phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
+	phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
+
+	cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+	if (likely(cpu_pa == gpu_pa)) {
+		dma_addr_t dma_addr;
+
+		BUG_ON(!cpu_page);
+		BUG_ON(offset + size > PAGE_SIZE);
+
+		dma_addr = kbase_dma_addr(cpu_page) + offset;
+		if (sync_fn == KBASE_SYNC_TO_CPU)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+		else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					size, DMA_BIDIRECTIONAL);
+	} else {
+		void *src = NULL;
+		void *dst = NULL;
+		struct page *gpu_page;
+
+		if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+			return;
+
+		gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+			src = ((unsigned char *)kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
+			dma_sync_single_for_cpu(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+			src = ((unsigned char *)kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+		}
+		memcpy(dst, src, size);
+		kunmap(gpu_page);
+		kunmap(cpu_page);
+		if (sync_fn == KBASE_SYNC_TO_DEVICE)
+			dma_sync_single_for_device(kctx->kbdev->dev,
+					kbase_dma_addr(gpu_page) + offset,
+					size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+		struct basep_syncset *sset, enum kbase_sync_type sync_fn)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	struct tagged_addr *cpu_pa;
+	struct tagged_addr *gpu_pa;
+	u64 page_off, page_count;
+	u64 i;
+	u64 offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			sset->mem_handle.basep.handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+				sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
+			kbase_mem_is_imported(reg->gpu_alloc->type))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+				start, sset->mem_handle.basep.handle);
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	page_off = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	cpu_pa = kbase_get_cpu_phy_pages(reg);
+	gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+	if (page_off > reg->nr_pages ||
+			page_off + page_count > reg->nr_pages) {
+		/* Sync overflows the region */
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Sync first page */
+	if (as_phys_addr_t(cpu_pa[page_off])) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+				offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!as_phys_addr_t(cpu_pa[page_off + i]))
+			break;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + i],
+				gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 &&
+	    as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+				gpu_pa[page_off + page_count - 1], 0, sz,
+				sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
+{
+	int err = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(sset != NULL);
+
+	if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev,
+				"mem_handle: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (reg->flags & KBASE_REG_JIT) {
+		dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+	int err = 0;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
+		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		return -EINVAL;
+	}
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return -EINVAL;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED &&
+				!(flags & BASE_MEM_UNCACHED_GPU))
+			return -EINVAL;
+	} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+	}
+
+	if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
+			flags & BASE_MEM_COHERENT_LOCAL) {
+		reg->flags |= KBASE_REG_SHARE_IN;
+	}
+
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
+
+
+	/* Set up default MEMATTR usage */
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		if (kctx->kbdev->mmu_mode->flags &
+				KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+			/* Override shareability, and MEMATTR for uncached */
+			reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+			reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+		} else {
+			dev_warn(kctx->kbdev->dev,
+				"Can't allocate GPU uncached memory due to MMU in Legacy Mode\n");
+			return -EINVAL;
+		}
+	} else if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+		(reg->flags & KBASE_REG_SHARE_BOTH)) {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
+	} else {
+		reg->flags |=
+			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+	}
+
+	if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING)
+		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
+
+	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
+		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
+
+	return 0;
+}
+
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.native.kctx;
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Check if we have enough pages requested so we can allocate a large
+	 * page (512 * 4KB = 2MB )
+	 */
+	if (nr_left >= (SZ_2M / SZ_4K)) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp,
+						 true);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			spin_lock(&kctx->mem_partials_lock);
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(sa->page +
+									   pidx),
+							      FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
+						/* unlink from partial list when full */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+			spin_unlock(&kctx->mem_partials_lock);
+		}
+
+		/* only if we actually have a chunk left <512. If more it indicates
+		 * that we couldn't allocate a 2MB above, so no point to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it */
+			struct page *np = NULL;
+
+			do {
+				int err;
+
+				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+				if (np)
+					break;
+				err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+				if (err)
+					break;
+			} while (1);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *sa;
+				struct page *p;
+
+				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+				if (!sa) {
+					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					goto no_new_partial;
+				}
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+
+				/* expose for later use */
+				spin_lock(&kctx->mem_partials_lock);
+				list_add(&sa->link, &kctx->mem_partials);
+				spin_unlock(&kctx->mem_partials_lock);
+			}
+		}
+	}
+no_new_partial:
+#endif
+
+	if (nr_left) {
+		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
+						 nr_left,
+						 tp,
+						 false);
+		if (res <= 0)
+			goto alloc_failed;
+	}
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		alloc->nents += nr_pages_to_free;
+
+		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
+		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
+		kbase_atomic_add_pages(nr_pages_to_free,
+			       &kctx->kbdev->memdev.used_pages);
+
+		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return -ENOMEM;
+}
+
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa)
+{
+	int new_page_count __maybe_unused;
+	size_t nr_left = nr_pages_requested;
+	int res;
+	struct kbase_context *kctx;
+	struct tagged_addr *tp;
+	struct tagged_addr *new_pages = NULL;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+
+	lockdep_assert_held(&pool->pool_lock);
+
+#if !defined(CONFIG_MALI_2MB_ALLOC)
+	WARN_ON(pool->order);
+#endif
+
+	if (alloc->reg) {
+		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
+			goto invalid_request;
+	}
+
+	kctx = alloc->imported.native.kctx;
+
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	new_page_count = kbase_atomic_add_pages(
+			nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer
+	 */
+	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+
+	tp = alloc->pages + alloc->nents;
+	new_pages = tp;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (pool->order) {
+		int nr_lp = nr_left / (SZ_2M / SZ_4K);
+
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_lp * (SZ_2M / SZ_4K),
+						 tp);
+
+		if (res > 0) {
+			nr_left -= res;
+			tp += res;
+		}
+
+		if (nr_left) {
+			struct kbase_sub_alloc *sa, *temp_sa;
+
+			list_for_each_entry_safe(sa, temp_sa,
+						 &kctx->mem_partials, link) {
+				int pidx = 0;
+
+				while (nr_left) {
+					pidx = find_next_zero_bit(sa->sub_pages,
+								  SZ_2M / SZ_4K,
+								  pidx);
+					bitmap_set(sa->sub_pages, pidx, 1);
+					*tp++ = as_tagged_tag(page_to_phys(
+							sa->page + pidx),
+							FROM_PARTIAL);
+					nr_left--;
+
+					if (bitmap_full(sa->sub_pages,
+							SZ_2M / SZ_4K)) {
+						/* unlink from partial list when
+						 * full
+						 */
+						list_del_init(&sa->link);
+						break;
+					}
+				}
+			}
+		}
+
+		/* only if we actually have a chunk left <512. If more it
+		 * indicates that we couldn't allocate a 2MB above, so no point
+		 * to retry here.
+		 */
+		if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
+			/* create a new partial and suballocate the rest from it
+			 */
+			struct page *np = NULL;
+
+			np = kbase_mem_pool_alloc_locked(pool);
+
+			if (np) {
+				int i;
+				struct kbase_sub_alloc *const sa = *prealloc_sa;
+				struct page *p;
+
+				/* store pointers back to the control struct */
+				np->lru.next = (void *)sa;
+				for (p = np; p < np + SZ_2M / SZ_4K; p++)
+					p->lru.prev = (void *)np;
+				INIT_LIST_HEAD(&sa->link);
+				bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
+				sa->page = np;
+
+				for (i = 0; i < nr_left; i++)
+					*tp++ = as_tagged_tag(
+							page_to_phys(np + i),
+							FROM_PARTIAL);
+
+				bitmap_set(sa->sub_pages, 0, nr_left);
+				nr_left = 0;
+				/* Indicate to user that we'll free this memory
+				 * later.
+				 */
+				*prealloc_sa = NULL;
+
+				/* expose for later use */
+				list_add(&sa->link, &kctx->mem_partials);
+			}
+		}
+		if (nr_left)
+			goto alloc_failed;
+	} else {
+#endif
+		res = kbase_mem_pool_alloc_pages_locked(pool,
+						 nr_left,
+						 tp);
+		if (res <= 0)
+			goto alloc_failed;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+
+	alloc->nents += nr_pages_requested;
+done:
+	return new_pages;
+
+alloc_failed:
+	/* rollback needed if got one or more 2MB but failed later */
+	if (nr_left != nr_pages_requested) {
+		size_t nr_pages_to_free = nr_pages_requested - nr_left;
+
+		struct tagged_addr *start_free = alloc->pages + alloc->nents;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+		if (pool->order) {
+			while (nr_pages_to_free) {
+				if (is_huge_head(*start_free)) {
+					kbase_mem_pool_free_pages_locked(
+						pool, 512,
+						start_free,
+						false, /* not dirty */
+						true); /* return to pool */
+					nr_pages_to_free -= 512;
+					start_free += 512;
+				} else if (is_partial(*start_free)) {
+					free_partial_locked(kctx, pool,
+							*start_free);
+					nr_pages_to_free--;
+					start_free++;
+				}
+			}
+		} else {
+#endif
+			kbase_mem_pool_free_pages_locked(pool,
+					nr_pages_to_free,
+					start_free,
+					false, /* not dirty */
+					true); /* return to pool */
+#ifdef CONFIG_MALI_2MB_ALLOC
+		}
+#endif
+	}
+
+	kbase_process_page_usage_dec(kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested,
+			       &kctx->kbdev->memdev.used_pages);
+
+invalid_request:
+	return NULL;
+}
+
+static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	p = as_page(tp);
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	spin_lock(&kctx->mem_partials_lock);
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+	spin_unlock(&kctx->mem_partials_lock);
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	int new_page_count __maybe_unused;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			free_partial(kctx, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages(&kctx->mem_pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+
+	return 0;
+}
+
+static void free_partial_locked(struct kbase_context *kctx,
+		struct kbase_mem_pool *pool, struct tagged_addr tp)
+{
+	struct page *p, *head_page;
+	struct kbase_sub_alloc *sa;
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	p = as_page(tp);
+	head_page = (struct page *)p->lru.prev;
+	sa = (struct kbase_sub_alloc *)head_page->lru.next;
+	clear_bit(p - head_page, sa->sub_pages);
+	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
+		list_del(&sa->link);
+		kbase_mem_pool_free_locked(pool, head_page, true);
+		kfree(sa);
+	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
+		   SZ_2M / SZ_4K - 1) {
+		/* expose the partial again */
+		list_add(&sa->link, &kctx->mem_partials);
+	}
+}
+
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
+	struct tagged_addr *start_free;
+	size_t freed = 0;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	lockdep_assert_held(&pool->pool_lock);
+	lockdep_assert_held(&kctx->mem_partials_lock);
+
+	/* early out if nothing to do */
+	if (!nr_pages_to_free)
+		return;
+
+	start_free = pages;
+
+	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	/* pad start_free to a valid start location */
+	while (nr_pages_to_free && is_huge(*start_free) &&
+	       !is_huge_head(*start_free)) {
+		nr_pages_to_free--;
+		start_free++;
+	}
+
+	while (nr_pages_to_free) {
+		if (is_huge_head(*start_free)) {
+			/* This is a 2MB entry, so free all the 512 pages that
+			 * it points to
+			 */
+			WARN_ON(!pool->order);
+			kbase_mem_pool_free_pages_locked(pool,
+					512,
+					start_free,
+					syncback,
+					reclaimed);
+			nr_pages_to_free -= 512;
+			start_free += 512;
+			freed += 512;
+		} else if (is_partial(*start_free)) {
+			WARN_ON(!pool->order);
+			free_partial_locked(kctx, pool, *start_free);
+			nr_pages_to_free--;
+			start_free++;
+			freed++;
+		} else {
+			struct tagged_addr *local_end_free;
+
+			WARN_ON(pool->order);
+			local_end_free = start_free;
+			while (nr_pages_to_free &&
+			       !is_huge(*local_end_free) &&
+			       !is_partial(*local_end_free)) {
+				local_end_free++;
+				nr_pages_to_free--;
+			}
+			kbase_mem_pool_free_pages_locked(pool,
+					local_end_free - start_free,
+					start_free,
+					syncback,
+					reclaimed);
+			freed += local_end_free - start_free;
+			start_free += local_end_free - start_free;
+		}
+	}
+
+	alloc->nents -= freed;
+
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		int new_page_count;
+
+		kbase_process_page_usage_dec(kctx, freed);
+		new_page_count = kbase_atomic_sub_pages(freed,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(freed,
+				       &kctx->kbdev->memdev.used_pages);
+
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kctx->id,
+				(u64)new_page_count);
+	}
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+
+		if (!WARN_ON(!alloc->imported.native.kctx)) {
+			if (alloc->permanent_map)
+				kbase_phy_alloc_mapping_term(
+						alloc->imported.native.kctx,
+						alloc);
+
+			/*
+			 * The physical allocation must have been removed from
+			 * the eviction list before trying to free it.
+			 */
+			mutex_lock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+			WARN_ON(!list_empty(&alloc->evict_node));
+			mutex_unlock(
+				&alloc->imported.native.kctx->jit_evict_lock);
+
+			kbase_process_page_usage_dec(
+					alloc->imported.native.kctx,
+					alloc->imported.native.nr_struct_pages);
+		}
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
+		kfree(alloc->imported.user_buf.pages);
+		break;
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+		goto out_term;
+
+	reg->cpu_alloc->reg = reg;
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
+	}
+
+	return 0;
+
+out_rollback:
+	kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return false;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* GPU executable memory cannot:
+	 * - Be written by the GPU
+	 * - Be grown on GPU page fault
+	 * - Have the top of its initial commit aligned to 'extent' */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+			(BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+			BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
+	/* To have an allocation lie within a 4GB chunk is required only for
+	 * TLS memory, which will never be used to contain executable code
+	 * and also used for Tiler heap.
+	 */
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+			(BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
+	/* Should not combine BASE_MEM_COHERENT_LOCAL with
+	 * BASE_MEM_COHERENT_SYSTEM */
+	if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
+			(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
+		return false;
+
+	return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+	/* Only known input flags should be set. */
+	if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+		return false;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return false;
+
+	/* Imported memory cannot be GPU executable */
+	if (flags & BASE_MEM_PROT_GPU_EX)
+		return false;
+
+	/* Imported memory cannot grow on page fault */
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		return false;
+
+	/* Imported memory cannot be aligned to the end of its initial commit */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP)
+		return false;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for importing. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return false;
+
+	/* Secure memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+		return false;
+
+	return true;
+}
+
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 large_extent)
+{
+	struct device *dev = kctx->kbdev->dev;
+	int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
+	struct kbase_va_region test_reg;
+
+	/* kbase_va_region's extent member can be of variable size, so check against that type */
+	test_reg.extent = large_extent;
+
+#define KBASE_MSG_PRE "GPU allocation attempted with "
+
+	if (0 == va_pages) {
+		dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
+		return -EINVAL;
+	}
+
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+				(unsigned long long)va_pages);
+		return -ENOMEM;
+	}
+
+	/* Note: commit_pages is checked against va_pages during
+	 * kbase_alloc_phy_pages() */
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld",
+				(unsigned long long)va_pages,
+				(unsigned long long)gpu_pc_pages_max);
+
+		return -EINVAL;
+	}
+
+	if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent == 0) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+		return -EINVAL;
+	}
+
+	if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
+			test_reg.extent != 0) {
+		dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+		return -EINVAL;
+	}
+
+	/* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
+	if (flags & BASE_MEM_TILER_ALIGN_TOP) {
+#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
+		unsigned long small_extent;
+
+		if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
+					(unsigned long long)large_extent,
+					BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+			return -EINVAL;
+		}
+		/* For use with is_power_of_2, which takes unsigned long, so
+		 * must ensure e.g. on 32-bit kernel it'll fit in that type */
+		small_extent = (unsigned long)large_extent;
+
+		if (!is_power_of_2(small_extent)) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
+					small_extent);
+			return -EINVAL;
+		}
+
+		if (commit_pages > large_extent) {
+			dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
+					(unsigned long)commit_pages,
+					(unsigned long)large_extent);
+			return -EINVAL;
+		}
+#undef KBASE_MSG_PRE_FLAG
+	}
+
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) &&
+	    (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space",
+				(unsigned long long)va_pages);
+		return -EINVAL;
+	}
+
+	return 0;
+#undef KBASE_MSG_PRE
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_init(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_evict_lock);
+		if (list_empty(&kctx->jit_destroy_head)) {
+			mutex_unlock(&kctx->jit_evict_lock);
+			break;
+		}
+
+		reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		list_del(&reg->jit_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		kbase_gpu_vm_lock(kctx);
+		reg->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	} while (1);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	INIT_LIST_HEAD(&kctx->jit_pending_alloc);
+	INIT_LIST_HEAD(&kctx->jit_atoms_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	kctx->jit_max_allocations = 0;
+	kctx->jit_current_allocations = 0;
+	kctx->trim_level = 0;
+
+	return 0;
+}
+
+/* Check if the allocation from JIT pool is of the same size as the new JIT
+ * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
+ * the alignment requirements.
+ */
+static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
+	struct kbase_va_region *walker, struct base_jit_alloc_info *info)
+{
+	bool meet_reqs = true;
+
+	if (walker->nr_pages != info->va_pages)
+		meet_reqs = false;
+	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+		size_t align = info->extent;
+		size_t align_mask = align - 1;
+
+		if ((walker->start_pfn + info->commit_pages) & align_mask)
+			meet_reqs = false;
+	}
+
+	return meet_reqs;
+}
+
+static int kbase_jit_grow(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info, struct kbase_va_region *reg)
+{
+	size_t delta;
+	size_t pages_required;
+	size_t old_size;
+	struct kbase_mem_pool *pool;
+	int ret = -ENOMEM;
+	struct tagged_addr *gpu_pages;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	if (info->commit_pages > reg->nr_pages) {
+		/* Attempted to grow larger than maximum size */
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Make the physical backing no longer reclaimable */
+	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+		goto update_failed;
+
+	if (reg->gpu_alloc->nents >= info->commit_pages)
+		goto done;
+
+	/* Grow the backing */
+	old_size = reg->gpu_alloc->nents;
+
+	/* Allocate some more pages */
+	delta = info->commit_pages - reg->gpu_alloc->nents;
+	pages_required = delta;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
+				GFP_KERNEL);
+		if (!prealloc_sas[i])
+			goto update_failed;
+	}
+
+	if (pages_required >= (SZ_2M / SZ_4K)) {
+		pool = &kctx->lp_mem_pool;
+		/* Round up to number of 2 MB pages required */
+		pages_required += ((SZ_2M / SZ_4K) - 1);
+		pages_required /= (SZ_2M / SZ_4K);
+	} else {
+#endif
+		pool = &kctx->mem_pool;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		pages_required *= 2;
+
+	spin_lock(&kctx->mem_partials_lock);
+	kbase_mem_pool_lock(pool);
+
+	/* As we can not allocate memory from the kernel with the vm_lock held,
+	 * grow the pool to the required size with the lock dropped. We hold the
+	 * pool lock to prevent another thread from allocating from the pool
+	 * between the grow and allocation.
+	 */
+	while (kbase_mem_pool_size(pool) < pages_required) {
+		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		kbase_gpu_vm_unlock(kctx);
+
+		if (kbase_mem_pool_grow(pool, pool_delta))
+			goto update_failed_unlocked;
+
+		kbase_gpu_vm_lock(kctx);
+		spin_lock(&kctx->mem_partials_lock);
+		kbase_mem_pool_lock(pool);
+	}
+
+	gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
+			delta, &prealloc_sas[0]);
+	if (!gpu_pages) {
+		kbase_mem_pool_unlock(pool);
+		spin_unlock(&kctx->mem_partials_lock);
+		goto update_failed;
+	}
+
+	if (reg->cpu_alloc != reg->gpu_alloc) {
+		struct tagged_addr *cpu_pages;
+
+		cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
+				pool, delta, &prealloc_sas[1]);
+		if (!cpu_pages) {
+			kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
+					pool, gpu_pages, delta);
+			kbase_mem_pool_unlock(pool);
+			spin_unlock(&kctx->mem_partials_lock);
+			goto update_failed;
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+	spin_unlock(&kctx->mem_partials_lock);
+
+	ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
+			old_size);
+	/*
+	 * The grow failed so put the allocation back in the
+	 * pool and return failure.
+	 */
+	if (ret)
+		goto update_failed;
+
+done:
+	ret = 0;
+
+	/* Update attributes of JIT allocation taken from the pool */
+	reg->initial_commit = info->commit_pages;
+	reg->extent = info->extent;
+
+update_failed:
+	kbase_gpu_vm_unlock(kctx);
+update_failed_unlocked:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	return ret;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+
+	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
+		/* Too many current allocations */
+		return NULL;
+	}
+	if (info->max_allocations > 0 &&
+			kctx->jit_current_allocations_per_bin[info->bin_id] >=
+			info->max_allocations) {
+		/* Too many current allocations in this bin */
+		return NULL;
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	if (info->usage_id != 0) {
+		/* First scan for an allocation with the same usage ID */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_usage_id == info->usage_id &&
+					walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (!reg) {
+		/* No allocation with the same usage ID, or usage IDs not in
+		 * use. Search for an allocation we can reuse.
+		 */
+		struct kbase_va_region *walker;
+		struct kbase_va_region *temp;
+		size_t current_diff = SIZE_MAX;
+
+		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
+				jit_node) {
+
+			if (walker->jit_bin_id == info->bin_id &&
+					meet_size_and_tiler_align_top_requirements(
+							kctx, walker, info)) {
+				size_t min_size, max_size, diff;
+
+				/*
+				 * The JIT allocations VA requirements have been
+				 * met, it's suitable but other allocations
+				 * might be a better fit.
+				 */
+				min_size = min_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				max_size = max_t(size_t,
+						walker->gpu_alloc->nents,
+						info->commit_pages);
+				diff = max_size - min_size;
+
+				if (current_diff > diff) {
+					current_diff = diff;
+					reg = walker;
+				}
+
+				/* The allocation is an exact match, so stop
+				 * looking.
+				 */
+				if (current_diff == 0)
+					break;
+			}
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_move(&reg->jit_node, &kctx->jit_active_head);
+
+		/*
+		 * Remove the allocation from the eviction list as it's no
+		 * longer eligible for eviction. This must be done before
+		 * dropping the jit_evict_lock
+		 */
+		list_del_init(&reg->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (kbase_jit_grow(kctx, info, reg) < 0) {
+			/*
+			 * An update to an allocation from the pool failed,
+			 * chances are slim a new allocation would fair any
+			 * better so return the allocation to the pool and
+			 * return the function with failure.
+			 */
+			goto update_failed_unlocked;
+		}
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
+			flags |= BASE_MEM_TILER_ALIGN_TOP;
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr);
+		if (!reg)
+			goto out_unlocked;
+
+		reg->flags |= KBASE_REG_JIT;
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+
+	kctx->jit_current_allocations++;
+	kctx->jit_current_allocations_per_bin[info->bin_id]++;
+
+	reg->jit_usage_id = info->usage_id;
+	reg->jit_bin_id = info->bin_id;
+
+	return reg;
+
+update_failed_unlocked:
+	mutex_lock(&kctx->jit_evict_lock);
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_evict_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	u64 old_pages;
+
+	/* Get current size of JIT region */
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (reg->initial_commit < old_pages) {
+		/* Free trim_level % of region, but don't go below initial
+		 * commit size
+		 */
+		u64 new_size = MAX(reg->initial_commit,
+			div_u64(old_pages * (100 - kctx->trim_level), 100));
+		u64 delta = old_pages - new_size;
+
+		if (delta) {
+			kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+			kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta,
+					old_pages);
+
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+		}
+	}
+
+	kctx->jit_current_allocations--;
+	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
+
+	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
+
+	kbase_gpu_vm_lock(kctx);
+	reg->flags |= KBASE_REG_DONT_NEED;
+	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Add the allocation to the eviction list and the jit pool, after this
+	 * point the shrink can reclaim it, or it may be reused.
+	 */
+	mutex_lock(&kctx->jit_evict_lock);
+
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&reg->gpu_alloc->evict_node));
+	list_add(&reg->gpu_alloc->evict_node, &kctx->evict_list);
+
+	list_move(&reg->jit_node, &kctx->jit_pool_head);
+
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+	if (WARN_ON(!kctx))
+		return;
+
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	list_move(&reg->jit_node, &kctx->jit_destroy_head);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_evict_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+		list_del_init(&reg->gpu_alloc->evict_node);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (reg) {
+		reg->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, reg);
+	}
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	kbase_gpu_vm_lock(kctx);
+	mutex_lock(&kctx->jit_evict_lock);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		list_del_init(&walker->gpu_alloc->evict_node);
+		mutex_unlock(&kctx->jit_evict_lock);
+		walker->flags &= ~KBASE_REG_JIT;
+		kbase_mem_free_region(kctx, walker);
+		mutex_lock(&kctx->jit_evict_lock);
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_gpu_vm_unlock(kctx);
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+	unsigned long gwt_mask = ~0;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = as_tagged(page_to_phys(pages[i]));
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			pa, kbase_reg_current_backed_size(reg),
+			reg->flags & gwt_mask, kctx->as_nr);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	while (++i < pinned_pages) {
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = reg->nr_pages;
+
+#ifdef CONFIG_MALI_JOB_DUMP
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			count,
+			(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
+			 gwt_mask,
+			kctx->as_nr);
+	if (err)
+		goto err_unmap_attachment;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD) {
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + count,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - count,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR);
+		if (err)
+			goto err_teardown_orig_pages;
+	}
+
+	return 0;
+
+err_teardown_orig_pages:
+	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+			count, kctx->as_nr);
+err_unmap_attachment:
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc) {
+				int err;
+
+				err = kbase_mmu_teardown_pages(
+						kctx->kbdev,
+						&kctx->mmu,
+						reg->start_pfn,
+						alloc->nents,
+						kctx->as_nr);
+				WARN_ON(err);
+			}
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx->kbdev,
+						&kctx->mmu,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg),
+						kctx->as_nr);
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 0000000000000..901f1cf5cc6c6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,1434 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
+		int pages);
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	int      count;
+	int      free_on_close;
+};
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	struct tagged_addr    *pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	/* Kernel side mapping of the alloc */
+	struct kbase_vmap_struct *permanent_map;
+
+	unsigned long properties;
+
+	/* member in union valid based on @a type */
+	union {
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			u64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		struct {
+			struct kbase_context *kctx;
+			/* Number of pages in this structure, including *pages.
+			 * Used for kernel memory tracking.
+			 */
+			size_t nr_struct_pages;
+		} native;
+		struct kbase_alloc_import_user_buf {
+			unsigned long address;
+			unsigned long size;
+			unsigned long nr_pages;
+			struct page **pages;
+			/* top bit (1<<31) of current_mapping_usage_count
+			 * specifies that this import was pinned on import
+			 * See PINNED_ON_IMPORT
+			 */
+			u32 current_mapping_usage_count;
+			struct mm_struct *mm;
+			dma_addr_t *dma_addrs;
+		} user_buf;
+	} imported;
+};
+
+/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
+ * used to signify that a buffer was pinned when it was imported. Since the
+ * reference count is limited by the number of atoms that can be submitted at
+ * once there should be no danger of overflowing into this bit.
+ * Stealing the top bit also has the benefit that
+ * current_mapping_usage_count != 0 if and only if the buffer is mapped.
+ */
+#define PINNED_ON_IMPORT	(1<<31)
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+/**
+ * kbase_mem_is_imported - Indicate whether a memory type is imported
+ *
+ * @type: the memory type
+ *
+ * Return: true if the memory type is imported, false otherwise
+ */
+static inline bool kbase_mem_is_imported(enum kbase_memory_type type)
+{
+	return (type == KBASE_MEM_TYPE_IMPORTED_UMM) ||
+		(type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct rb_root *rbtree;	/* Backlink to rb tree */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+	/* Initial commit, for aligning the start address and correctly growing
+	 * KBASE_REG_TILER_ALIGN_TOP regions */
+	size_t initial_commit;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE            (1ul << 19)
+
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD        (1ul << 21)
+
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags */
+#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
+
+/* The top of the initial commit is aligned to extent pages.
+ * Extent must be a power of 2 */
+#define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
+
+/* Memory is handled by JIT - user space should not be able to free it */
+#define KBASE_REG_JIT               (1ul << 24)
+
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from 4GB
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (0x100000000ULL >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+	struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
+	/* The last JIT usage ID for this region */
+	u16 jit_usage_id;
+	/* The JIT bin this allocation came from */
+	u8 jit_bin_id;
+};
+
+/* Common functions */
+static inline struct tagged_addr *kbase_get_cpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->pages;
+}
+
+static inline struct tagged_addr *kbase_get_gpu_phy_pages(
+		struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (!reg->cpu_alloc)
+		return 0;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+	return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
+		struct kbase_context *kctx, size_t nr_pages,
+		enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+	size_t per_page_size = sizeof(*alloc->pages);
+
+	/* Imported pages may have page private data already in use */
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+		alloc_size += nr_pages *
+				sizeof(*alloc->imported.user_buf.dma_addrs);
+		per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+	}
+
+	/*
+	 * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+	 * wrapping around.
+	 */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ per_page_size))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	if (type == KBASE_MEM_TYPE_NATIVE) {
+		alloc->imported.native.nr_struct_pages =
+				(alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		kbase_process_page_usage_inc(kctx,
+				alloc->imported.native.nr_struct_pages);
+	}
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void *)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+
+	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+		alloc->imported.user_buf.dma_addrs =
+				(void *) (alloc->pages + nr_pages);
+
+	return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+		struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
+			KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->cpu_alloc))
+		return PTR_ERR(reg->cpu_alloc);
+	else if (!reg->cpu_alloc)
+		return -ENOMEM;
+
+	reg->cpu_alloc->imported.native.kctx = kctx;
+	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
+	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+		reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
+				KBASE_MEM_TYPE_NATIVE);
+		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
+			kbase_mem_phy_alloc_put(reg->cpu_alloc);
+			return -ENOMEM;
+		}
+		reg->gpu_alloc->imported.native.kctx = kctx;
+	} else {
+		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	}
+
+	mutex_lock(&kctx->jit_evict_lock);
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
+	INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	reg->flags &= ~KBASE_REG_FREE;
+
+	return 0;
+}
+
+static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/*
+ * Max size for kbdev memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * Max size for kctx memory pool (in pages)
+ */
+#define KBASE_MEM_POOL_MAX_SIZE_KCTX  (SZ_64M >> PAGE_SHIFT)
+
+/*
+ * The order required for a 2MB page allocation (2^order * 4KB = 2MB)
+ */
+#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER	9
+
+/*
+ * The order required for a 4KB page allocation
+ */
+#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
+
+/**
+ * kbase_mem_pool_init - Create a memory pool for a kbase device
+ * @pool:      Memory pool to initialize
+ * @max_size:  Maximum number of free pages the pool can hold
+ * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @kbdev:     Kbase device where memory is used
+ * @next_pool: Pointer to the next pool or NULL.
+ *
+ * Allocations from @pool are in whole pages. Each @pool has a free list where
+ * pages can be quickly allocated from. The free list is initially empty and
+ * filled whenever pages are freed back to the pool. The number of free pages
+ * in the pool will in general not exceed @max_size, but the pool may in
+ * certain corner cases grow above @max_size.
+ *
+ * If @next_pool is not NULL, we will allocate from @next_pool before going to
+ * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * @pool is full. Pages are zeroed before they spill over to another pool, to
+ * prevent leaking information between applications.
+ *
+ * A shrinker is registered so that Linux mm can reclaim pages from the pool as
+ * needed.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool);
+
+/**
+ * kbase_mem_pool_term - Destroy a memory pool
+ * @pool:  Memory pool to destroy
+ *
+ * Pages in the pool will spill over to @next_pool (if available) or freed to
+ * the kernel.
+ */
+void kbase_mem_pool_term(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * Allocations from the pool are made as follows:
+ * 1. If there are free pages in the pool, allocate a page from @pool.
+ * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
+ *    from @next_pool.
+ * 3. Return NULL if no memory in the pool
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_locked() instead.
+ */
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_alloc_locked - Allocate a page from memory pool
+ * @pool:  Memory pool to allocate from
+ *
+ * If there are free pages in the pool, this function allocates a page from
+ * @pool. This function does not use @next_pool.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_pool_free - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @page:  Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * Pages are freed to the pool as follows:
+ * 1. If @pool is not full, add @page to @pool.
+ * 2. Otherwise, if @next_pool is not NULL and not full, add @page to
+ *    @next_pool.
+ * 3. Finally, free @page to the kernel.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_free_locked() instead.
+ */
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_free_locked - Free a page to memory pool
+ * @pool:  Memory pool where page should be freed
+ * @p:     Page to free to the pool
+ * @dirty: Whether some of the page may be dirty in the cache.
+ *
+ * If @pool is not full, this function adds @page to @pool. Otherwise, @page is
+ * freed to the kernel. This function does not use @next_pool.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty);
+
+/**
+ * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
+ * @pool:     Memory pool to allocate from
+ * @nr_pages: Number of pages to allocate
+ * @pages:    Pointer to array where the physical address of the allocated
+ *            pages will be stored.
+ * @partial_allowed: If fewer pages allocated is allowed
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
+ *
+ * Return:
+ * On success number of pages allocated (could be less than nr_pages if
+ * partial_allowed).
+ * On error an error code.
+ *
+ * Note : This function should not be used if the pool lock is held. Use
+ * kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ */
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool partial_allowed);
+
+/**
+ * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
+ * @pool:        Memory pool to allocate from
+ * @nr_4k_pages: Number of pages to allocate
+ * @pages:       Pointer to array where the physical address of the allocated
+ *               pages will be stored.
+ *
+ * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This
+ * version does not allocate new pages from the kernel, and therefore will never
+ * trigger the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_mem_pool_alloc_pages_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * Return:
+ * On success number of pages allocated.
+ * On error an error code.
+ *
+ * Note : Caller must hold the pool lock.
+ */
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages);
+
+/**
+ * kbase_mem_pool_free_pages - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed);
+
+/**
+ * kbase_mem_pool_free_pages_locked - Free pages to memory pool
+ * @pool:     Memory pool where pages should be freed
+ * @nr_pages: Number of pages to free
+ * @pages:    Pointer to array holding the physical addresses of the pages to
+ *            free.
+ * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
+ *
+ * Like kbase_mem_pool_free() but optimized for freeing many pages.
+ */
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed);
+
+/**
+ * kbase_mem_pool_size - Get number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool)
+{
+	return READ_ONCE(pool->cur_size);
+}
+
+/**
+ * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool
+ * @pool:  Memory pool to inspect
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
+{
+	return pool->max_size;
+}
+
+
+/**
+ * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool
+ * @pool:     Memory pool to inspect
+ * @max_size: Maximum number of free pages the pool can hold
+ *
+ * If @max_size is reduced, the pool will be shrunk to adhere to the new limit.
+ * For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
+
+/**
+ * kbase_mem_pool_grow - Grow the pool
+ * @pool:       Memory pool to grow
+ * @nr_to_grow: Number of pages to add to the pool
+ *
+ * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
+ * become larger than the maximum size specified.
+ *
+ * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ */
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+
+/**
+ * kbase_mem_pool_trim - Grow or shrink the pool to a new size
+ * @pool:     Memory pool to trim
+ * @new_size: New number of pages in the pool
+ *
+ * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
+/**
+ * kbase_mem_pool_mark_dying - Mark that this pool is dying
+ * @pool:     Memory pool
+ *
+ * This will cause any ongoing allocation operations (eg growing on page fault)
+ * to be terminated.
+ */
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool);
+
+/**
+ * kbase_mem_alloc_page - Allocate a new page for a device
+ * @pool:  Memory pool to allocate a page from
+ *
+ * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
+ * function can fail in the event the pool is empty.
+ *
+ * Return: A new page or NULL if no memory
+ */
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
+		u8 max_allocations, u8 trim_level);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+/**
+ * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ *
+ * This will free all the regions within the region tracker
+ *
+ * @rbtree: Region tracker tree root
+ */
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_enclosing_address(
+		struct rb_root *rbtree, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+		struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
+		u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+		u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
+		u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, u64 addr, size_t nr_pages,
+		size_t align);
+int kbase_remove_va_region(struct kbase_va_region *reg);
+
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+
+/**
+ * kbase_check_alloc_sizes - check user space sizes parameters for an
+ *                           allocation
+ *
+ * @kctx:         kbase context
+ * @flags:        The flags passed from user space
+ * @va_pages:     The size of the requested region, in pages.
+ * @commit_pages: Number of pages to commit initially.
+ * @extent:       Number of pages to grow by on GPU page fault and/or alignment
+ *                (depending on flags)
+ *
+ * Makes checks on the size parameters passed in from user space for a memory
+ * allocation call, with respect to the flags requested.
+ *
+ * Return: 0 if sizes are valid for these flags, negative error code otherwise
+ */
+int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
+		u64 va_pages, u64 commit_pages, u64 extent);
+
+/**
+ * kbase_update_region_flags - Convert user space flags to kernel region flags
+ *
+ * @kctx:  kbase context
+ * @reg:   The region to update the flags on
+ * @flags: The flags passed from user space
+ *
+ * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and
+ * this function will fail if the system does not support system coherency.
+ *
+ * Return: 0 if successful, -EINVAL if the flags are not supported
+ */
+int kbase_update_region_flags(struct kbase_context *kctx,
+		struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+/**
+ * kbase_mmu_init - Initialise an object representing GPU page tables
+ *
+ * The structure should be terminated using kbase_mmu_term()
+ *
+ * @kbdev: kbase device
+ * @mmut:  structure to initialise
+ * @kctx:  optional kbase context, may be NULL if this set of MMU tables is not
+ *         associated with a context
+ */
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		struct kbase_context *kctx);
+/**
+ * kbase_mmu_term - Terminate an object representing GPU page tables
+ *
+ * This will free any page tables that have been allocated
+ *
+ * @kbdev: kbase device
+ * @mmut:  kbase_mmu_table to be destroyed
+ */
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+			   struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags, int as_nr);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+			     struct kbase_mmu_table *mmut, u64 vpfn,
+			     size_t nr, int as_nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_mmu_update - Configure an address space on the GPU to the specified
+ *                    MMU tables
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ *
+ * @kbdev: Kbase device structure
+ * @mmut:  The set of MMU tables to be configured on the address space
+ * @as_nr: The address space to be configured
+ */
+void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr);
+
+/**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
+ * @kbdev:	Kbase device
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
+ *
+ * The caller must hold kbdev->mmu_hw_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+/**
+ * kbase_sync_now - Perform cache maintenance on a memory region
+ *
+ * @kctx: The kbase context of the region
+ * @sset: A syncset structure describing the region and direction of the
+ *        synchronisation required
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset);
+void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa,
+		struct tagged_addr gpu_pa, off_t offset, size_t size,
+		enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
+ * mapping of a memory allocation containing a given address range
+ *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
+ * @kctx:      The kernel base context used for the allocation.
+ * @uaddr:     Start of the CPU virtual address range.
+ * @size:      Size of the CPU virtual address range (in bytes).
+ * @offset:    The offset from the start of the allocation to the specified CPU
+ *             virtual address.
+ *
+ * Return: 0 if offset was obtained successfully. Error code otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(
+		struct kbase_context *kctx,
+		unsigned long uaddr, size_t size, u64 *offset);
+
+/**
+ * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of
+ * the start of GPU virtual memory region which encloses @gpu_addr for the
+ * @size length in bytes
+ *
+ * Searches for the memory region in GPU virtual memory space which contains
+ * the region defined by the @gpu_addr and @size, where @gpu_addr is the
+ * beginning and @size the length in bytes of the provided region. If found,
+ * the location of the start address of the GPU virtual memory region is
+ * passed in @start pointer and the location of the offset of the region into
+ * the GPU virtual memory region is passed in @offset pointer.
+ *
+ * @kctx:	The kernel base context within which the memory is searched.
+ * @gpu_addr:	GPU virtual address for which the region is sought; defines
+ *              the beginning of the provided region.
+ * @size:       The length (in bytes) of the provided region for which the
+ *              GPU virtual memory region is sought.
+ * @start:      Pointer to the location where the address of the start of
+ *              the found GPU virtual memory region is.
+ * @offset:     Pointer to the location where the offset of @gpu_addr into
+ *              the found GPU virtual memory region is.
+ */
+int kbasep_find_enclosing_gpu_mapping_start_and_offset(
+		struct kbase_context *kctx,
+		u64 gpu_addr, size_t size, u64 *start, u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * kbase_alloc_phy_pages_helper - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @nr_pages_requested: number of physical pages to allocate
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object.
+ *
+ * Return: 0 if all pages have been successfully allocated. Error code otherwise
+ *
+ * Note : The caller must not hold vm_lock, as this could cause a deadlock if
+ * the kernel OoM killer runs. If the caller must allocate pages while holding
+ * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * This function cannot be used from interrupt context
+ */
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
+		size_t nr_pages_requested);
+
+/**
+ * kbase_alloc_phy_pages_helper_locked - Allocates physical pages.
+ * @alloc:              allocation object to add pages to
+ * @pool:               Memory pool to allocate from
+ * @nr_pages_requested: number of physical pages to allocate
+ * @prealloc_sa:        Information about the partial allocation if the amount
+ *                      of memory requested is not a multiple of 2MB. One
+ *                      instance of struct kbase_sub_alloc must be allocated by
+ *                      the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ *
+ * Allocates \a nr_pages_requested and updates the alloc object. This function
+ * does not allocate new pages from the kernel, and therefore will never trigger
+ * the OoM killer. Therefore, it can be run while the vm_lock is held.
+ *
+ * As new pages can not be allocated, the caller must ensure there are
+ * sufficient pages in the pool. Usage of this function should look like :
+ *
+ *   kbase_gpu_vm_lock(kctx);
+ *   kbase_mem_pool_lock(pool)
+ *   while (kbase_mem_pool_size(pool) < pages_required) {
+ *     kbase_mem_pool_unlock(pool)
+ *     kbase_gpu_vm_unlock(kctx);
+ *     kbase_mem_pool_grow(pool)
+ *     kbase_gpu_vm_lock(kctx);
+ *     kbase_mem_pool_lock(pool)
+ *   }
+ *   kbase_alloc_phy_pages_helper_locked(pool)
+ *   kbase_mem_pool_unlock(pool)
+ *   Perform other processing that requires vm_lock...
+ *   kbase_gpu_vm_unlock(kctx);
+ *
+ * This ensures that the pool can be grown to the required size and that the
+ * allocation can complete without another thread using the newly grown pages.
+ *
+ * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
+ * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.native.kctx->mem_pool.
+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be
+ * pre-allocated because we must not sleep (due to the usage of kmalloc())
+ * whilst holding pool->pool_lock.
+ * @prealloc_sa shall be set to NULL if it has been consumed by this function
+ * to indicate that the caller must not free it.
+ *
+ * Return: Pointer to array of allocated pages. NULL on failure.
+ *
+ * Note : Caller must hold pool->pool_lock
+ */
+struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
+		struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
+		size_t nr_pages_requested,
+		struct kbase_sub_alloc **prealloc_sa);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+/**
+ * kbase_free_phy_pages_helper_locked - Free pages allocated with
+ *                                      kbase_alloc_phy_pages_helper_locked()
+ * @alloc:            Allocation object to free pages from
+ * @pool:             Memory pool to return freed pages to
+ * @pages:            Pages allocated by kbase_alloc_phy_pages_helper_locked()
+ * @nr_pages_to_free: Number of physical pages to free
+ *
+ * This function atomically frees pages allocated with
+ * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page
+ * array that is returned by that function. @pool must be the pool that the
+ * pages were originally allocated from.
+ *
+ * If the mem_pool has been unlocked since the allocation then
+ * kbase_free_phy_pages_helper() should be used instead.
+ */
+void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
+		struct kbase_mem_pool *pool, struct tagged_addr *pages,
+		size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private field stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+static inline void kbase_clear_dma_addr(struct page *p)
+{
+	ClearPagePrivate(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data  work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev   Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir:  DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir);
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_init(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_pool_lock - Lock a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
+{
+	spin_lock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_pool_lock - Release a memory pool
+ * @pool: Memory pool to lock
+ */
+static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool)
+{
+	spin_unlock(&pool->pool_lock);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 0000000000000..4b4214f26f5d8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,2568 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
+#include <linux/cache.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_ioctl.h>
+
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map);
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map);
+
+/*
+ * From 4.20.0 kernel vm_insert_pfn was dropped
+ * Make wrapper to preserve compatibility
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0)
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  unsigned long pfn)
+{
+	return vm_fault_to_errno(vmf_insert_pfn(vma, addr, pfn), 0xffff);
+}
+#endif
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+
+/* Retrieve the associated region pointer if the GPU address corresponds to
+ * one of the event memory pages. The enclosing region, if found, shouldn't
+ * have been marked as free.
+ */
+static struct kbase_va_region *kbase_find_event_mem_region(
+			struct kbase_context *kctx, u64 gpu_addr)
+{
+
+	return NULL;
+}
+
+/**
+ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping
+ *                                of the physical allocation belonging to a
+ *                                region
+ * @kctx:  The kernel base context @reg belongs to.
+ * @reg:   The region whose physical allocation is to be mapped
+ * @vsize: The size of the requested region, in pages
+ * @size:  The size in pages initially committed to the region
+ *
+ * Return: 0 on success, otherwise an error code indicating failure
+ *
+ * Maps the physical allocation backing a non-free @reg, so it may be
+ * accessed directly from the kernel. This is only supported for physical
+ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of
+ * physical allocation.
+ *
+ * The mapping is stored directly in the allocation that backs @reg. The
+ * refcount is not incremented at this point. Instead, use of the mapping should
+ * be surrounded by kbase_phy_alloc_mapping_get() and
+ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the
+ * client is accessing it.
+ *
+ * Both cached and uncached regions are allowed, but any sync operations are the
+ * responsibility of the client using the permanent mapping.
+ *
+ * A number of checks are made to ensure that a region that needs a permanent
+ * mapping can actually be supported:
+ * - The region must be created as fully backed
+ * - The region must not be growable
+ *
+ * This function will fail if those checks are not satisfied.
+ *
+ * On success, the region will also be forced into a certain kind:
+ * - It will no longer be growable
+ */
+static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx,
+		struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	size_t size_bytes = (size << PAGE_SHIFT);
+	struct kbase_vmap_struct *kern_mapping;
+	int err = 0;
+
+	/* Can only map in regions that are always fully committed
+	 * Don't setup the mapping twice
+	 * Only support KBASE_MEM_TYPE_NATIVE allocations
+	 */
+	if (vsize != size || reg->cpu_alloc->permanent_map != NULL ||
+			reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		return -EINVAL;
+
+	if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+			kctx->permanent_mapped_pages)) {
+		dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages",
+				(u64)size,
+				KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+				kctx->permanent_mapped_pages);
+		return -ENOMEM;
+	}
+
+	kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
+	if (!kern_mapping)
+		return -ENOMEM;
+
+	err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+	if (err < 0)
+		goto vmap_fail;
+
+	/* No support for growing or shrinking mapped regions */
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	reg->cpu_alloc->permanent_map = kern_mapping;
+	kctx->permanent_mapped_pages += size;
+
+	return 0;
+vmap_fail:
+	kfree(kern_mapping);
+	return err;
+}
+
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	WARN_ON(!alloc->permanent_map);
+	kbase_vunmap_phy_pages(kctx, alloc->permanent_map);
+	kfree(alloc->permanent_map);
+
+	alloc->permanent_map = NULL;
+
+	/* Mappings are only done on cpu_alloc, so don't need to worry about
+	 * this being reduced a second time if a separate gpu_alloc is
+	 * freed
+	 */
+	WARN_ON(alloc->nents > kctx->permanent_mapped_pages);
+	kctx->permanent_mapped_pages -= alloc->nents;
+}
+
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
+		u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping)
+{
+	struct kbase_va_region *reg;
+	void *kern_mem_ptr = NULL;
+	struct kbase_vmap_struct *kern_mapping;
+	u64 mapping_offset;
+
+	WARN_ON(!kctx);
+	WARN_ON(!out_kern_mapping);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* First do a quick lookup in the list of event memory regions */
+	reg = kbase_find_event_mem_region(kctx, gpu_addr);
+
+	if (!reg) {
+		reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, gpu_addr);
+	}
+
+	if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0)
+		goto out_unlock;
+
+	kern_mapping = reg->cpu_alloc->permanent_map;
+	if (kern_mapping == NULL)
+		goto out_unlock;
+
+	mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+
+	/* Refcount the allocations to prevent them disappearing */
+	WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc);
+	WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc);
+	(void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc);
+
+	kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset);
+	*out_kern_mapping = kern_mapping;
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return kern_mem_ptr;
+}
+
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping)
+{
+	WARN_ON(!kctx);
+	WARN_ON(!kern_mapping);
+
+	WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx);
+	WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map);
+
+	kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc);
+	kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc);
+
+	/* kern_mapping and the gpu/cpu phy allocs backing it must not be used
+	 * from now on
+	 */
+}
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va)
+{
+	int zone;
+	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
+	struct device *dev;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+
+	dev = kctx->kbdev->dev;
+	*gpu_va = 0; /* return 0 on failure */
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
+		goto bad_sizes;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA) {
+		rbtree = &kctx->reg_rbtree_same;
+		zone = KBASE_REG_ZONE_SAME_VA;
+	} else {
+		rbtree = &kctx->reg_rbtree_custom;
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+	}
+
+	reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
+		/* kbase_check_alloc_sizes() already checks extent is valid for
+		 * assigning to reg->extent */
+		reg->extent = extent;
+	} else {
+		reg->extent = 0;
+	}
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+	reg->initial_commit = commit_pages;
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) {
+		/* Permanent kernel mappings must happen as soon as
+		 * reg->cpu_alloc->pages is ready. Currently this happens after
+		 * kbase_alloc_phy_pages(). If we move that to setup pages
+		 * earlier, also move this call too
+		 */
+		int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages,
+				commit_pages);
+		if (err < 0) {
+			kbase_gpu_vm_unlock(kctx);
+			goto no_kern_mapping;
+		}
+	}
+
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		unsigned long prot = PROT_NONE;
+		unsigned long va_size = va_pages << PAGE_SHIFT;
+		unsigned long va_map = va_size;
+		unsigned long cookie, cookie_nr;
+		unsigned long cpu_addr;
+
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_cookie;
+		}
+		/* return a cookie */
+		cookie_nr = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << cookie_nr);
+		BUG_ON(kctx->pending_regions[cookie_nr]);
+		kctx->pending_regions[cookie_nr] = reg;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* relocate to correct base */
+		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		cookie <<= PAGE_SHIFT;
+
+		/*
+		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
+		 * For all other versions we can just return the cookie
+		 */
+		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
+		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+			*gpu_va = (u64) cookie;
+			return reg;
+		}
+		if (*flags & BASE_MEM_PROT_CPU_RD)
+			prot |= PROT_READ;
+		if (*flags & BASE_MEM_PROT_CPU_WR)
+			prot |= PROT_WRITE;
+
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
+		if (IS_ERR_VALUE(cpu_addr)) {
+			kbase_gpu_vm_lock(kctx);
+			kctx->pending_regions[cookie_nr] = NULL;
+			kctx->cookies |= (1UL << cookie_nr);
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+
+		*gpu_va = (u64) cpu_addr;
+	} else /* we control the VA */ {
+		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			kbase_gpu_vm_unlock(kctx);
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+		kbase_gpu_vm_unlock(kctx);
+	}
+
+	return reg;
+
+no_mmap:
+no_cookie:
+no_kern_mapping:
+no_mem:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+invalid_flags:
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_sizes:
+bad_flags:
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_alloc);
+
+int kbase_mem_query(struct kbase_context *kctx,
+		u64 gpu_addr, u64 query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->cpu_alloc->imported.alias.aliased;
+			for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		if (kctx->api_version >= KBASE_API_VERSION(11, 2)) {
+			/* Prior to 11.2, these were known about by user-side
+			 * but we did not return them. Returning some of these
+			 * caused certain clients that were not expecting them
+			 * to fail, so we omit all of them as a special-case
+			 * for compatibility reasons */
+			if (KBASE_REG_PF_GROW & reg->flags)
+				*out |= BASE_MEM_GROW_ON_GPF;
+			if (KBASE_REG_SECURE & reg->flags)
+				*out |= BASE_MEM_SECURE;
+		}
+		if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
+			*out |= BASE_MEM_TILER_ALIGN_TOP;
+		if (!(KBASE_REG_GPU_CACHED & reg->flags))
+			*out |= BASE_MEM_UNCACHED_GPU;
+		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
+			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
+
+		WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
+				"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
+		*out &= BASE_MEM_FLAGS_QUERYABLE;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->jit_evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->jit_evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->jit_evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	int __maybe_unused new_page_count;
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.native.kctx;
+	int __maybe_unused new_page_count;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/* This allocation can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->jit_evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	int ret = -EINVAL;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return -EINVAL;
+
+	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE))
+		return -EINVAL;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
+	/* limit to imported memory */
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
+		goto out_unlock;
+
+	/* shareability flags are ignored for GPU uncached memory */
+	if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		/* Future use will use the new flags, existing mapping will NOT be updated
+		 * as memory should not be in use by the GPU when updating the flags.
+		 */
+		ret = 0;
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+	}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+	/* roll back on error */
+	if (ret)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
+		int fd, u64 *va_pages, u64 *flags, u32 padding)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
+				0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
+			KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	/* No pages to map yet */
+	reg->gpu_alloc->nents = 0;
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+
+	if (*flags & BASE_MEM_SECURE)
+		reg->flags |= KBASE_REG_SECURE;
+
+	if (padding)
+		reg->flags |= KBASE_REG_IMPORT_PAD;
+
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
+	reg->gpu_alloc->imported.umm.sgt = NULL;
+	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+{
+	u32 cpu_cache_line_size = cache_line_size();
+	u32 gpu_cache_line_size =
+		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
+	return ((cpu_cache_line_size > gpu_cache_line_size) ?
+				cpu_cache_line_size :
+				gpu_cache_line_size);
+}
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+		struct kbase_context *kctx, unsigned long address,
+		unsigned long size, u64 *va_pages, u64 *flags)
+{
+	long i;
+	struct kbase_va_region *reg;
+	struct rb_root *rbtree;
+	long faulted_pages;
+	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+	struct kbase_alloc_import_user_buf *user_buf;
+	struct page **pages = NULL;
+
+	if ((address & (cache_line_alignment - 1)) != 0 ||
+			(size & (cache_line_alignment - 1)) != 0) {
+		if (*flags & BASE_MEM_UNCACHED_GPU) {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and marked as GPU uncached\n");
+			goto bad_size;
+		}
+
+		/* Coherency must be enabled to handle partial cache lines */
+		if (*flags & (BASE_MEM_COHERENT_SYSTEM |
+			BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
+			/* Force coherent system required flag, import will
+			 * then fail if coherency isn't available
+			 */
+			*flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+		} else {
+			dev_warn(kctx->kbdev->dev,
+					"User buffer is not cache line aligned and no coherency enabled\n");
+			goto bad_size;
+		}
+	}
+
+	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+		PFN_DOWN(address);
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* SAME_VA generally not supported with imported memory (no known use cases) */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
+		*flags |= BASE_MEM_NEED_MMAP;
+		zone = KBASE_REG_ZONE_SAME_VA;
+		rbtree = &kctx->reg_rbtree_same;
+	} else
+		rbtree = &kctx->reg_rbtree_custom;
+
+	reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+
+	if (!reg)
+		goto no_region;
+
+	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
+			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+	user_buf = &reg->gpu_alloc->imported.user_buf;
+
+	user_buf->size = size;
+	user_buf->address = address;
+	user_buf->nr_pages = *va_pages;
+	user_buf->mm = current->mm;
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
+	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
+			GFP_KERNEL);
+
+	if (!user_buf->pages)
+		goto no_page_array;
+
+	/* If the region is coherent with the CPU then the memory is imported
+	 * and mapped onto the GPU immediately.
+	 * Otherwise get_user_pages is called as a sanity check, but with
+	 * NULL as the pages argument which will fault the pages, but not
+	 * pin them. The memory will then be pinned only around the jobs that
+	 * specify the region as an external resource.
+	 */
+	if (reg->flags & KBASE_REG_SHARE_BOTH) {
+		pages = user_buf->pages;
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
+	down_read(&current->mm->mmap_sem);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#endif
+
+	up_read(&current->mm->mmap_sem);
+
+	if (faulted_pages != *va_pages)
+		goto fault_mismatch;
+
+	reg->gpu_alloc->nents = 0;
+	reg->extent = 0;
+
+	if (pages) {
+		struct device *dev = kctx->kbdev->dev;
+		unsigned long local_size = user_buf->size;
+		unsigned long offset = user_buf->address & ~PAGE_MASK;
+		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+
+		/* Top bit signifies that this was pinned on import */
+		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+
+		for (i = 0; i < faulted_pages; i++) {
+			dma_addr_t dma_addr;
+			unsigned long min;
+
+			min = MIN(PAGE_SIZE - offset, local_size);
+			dma_addr = dma_map_page(dev, pages[i],
+					offset, min,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev, dma_addr))
+				goto unwind_dma_map;
+
+			user_buf->dma_addrs[i] = dma_addr;
+			pa[i] = as_tagged(page_to_phys(pages[i]));
+
+			local_size -= min;
+			offset = 0;
+		}
+
+		reg->gpu_alloc->nents = faulted_pages;
+	}
+
+	return reg;
+
+unwind_dma_map:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				user_buf->dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+fault_mismatch:
+	if (pages) {
+		for (i = 0; i < faulted_pages; i++)
+			put_page(pages[i]);
+	}
+no_page_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	return NULL;
+
+}
+
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+	bool coherent;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+	coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
+				*num_pages,
+				KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+				0, *num_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->gpu_alloc))
+		goto no_alloc_obj;
+
+	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto invalid_flags;
+
+	reg->gpu_alloc->imported.alias.nents = nents;
+	reg->gpu_alloc->imported.alias.stride = stride;
+	reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+	if (!reg->gpu_alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
+			if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED))
+				goto bad_handle; /* GPU uncached memory */
+			if (!aliasing_reg->gpu_alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+			if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+				goto bad_handle;
+				/* Non-coherent memory cannot alias
+				   coherent memory, and vice versa.*/
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->gpu_alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+invalid_flags:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
+			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA))
+		*flags |= BASE_MEM_SAME_VA;
+
+	if (!kbase_check_import_flags(*flags)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_import call required coherent mem when unavailable");
+		goto bad_flags;
+	}
+	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
+		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
+	}
+
+	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
+		dev_warn(kctx->kbdev->dev,
+				"padding is only supported for UMM");
+		goto bad_flags;
+	}
+
+	switch (type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM: {
+		int fd;
+
+		if (get_user(fd, (int __user *)phandle))
+			reg = NULL;
+		else
+			reg = kbase_mem_from_umm(kctx, fd, va_pages, flags,
+					padding);
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+		struct base_mem_import_user_buffer user_buffer;
+		void __user *uptr;
+
+		if (copy_from_user(&user_buffer, phandle,
+				sizeof(user_buffer))) {
+			reg = NULL;
+		} else {
+#ifdef CONFIG_COMPAT
+			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+				uptr = compat_ptr(user_buffer.ptr);
+			else
+#endif
+				uptr = u64_to_user_ptr(user_buffer.ptr);
+
+			reg = kbase_mem_from_user_buffer(kctx,
+					(unsigned long)uptr, user_buffer.length,
+					va_pages, flags);
+		}
+		break;
+	}
+	default: {
+		reg = NULL;
+		break;
+	}
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+no_reg:
+bad_flags:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct tagged_addr *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags, kctx->as_nr);
+
+	return ret;
+}
+
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 gpu_va_start = reg->start_pfn;
+
+	if (new_pages == old_pages)
+		/* Nothing to do */
+		return;
+
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+			(gpu_va_start + new_pages)<<PAGE_SHIFT,
+			(old_pages - new_pages)<<PAGE_SHIFT, 1);
+}
+
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn + new_pages, delta, kctx->as_nr);
+
+	return ret;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	bool read_locked = false;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	if (gpu_addr & ~PAGE_MASK) {
+		dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid");
+		return -EINVAL;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+	KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE))
+		goto out_unlock;
+
+	/* Would overflow the VA region */
+	if (new_pages > reg->nr_pages)
+		goto out_unlock;
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1)
+		goto out_unlock;
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	if (new_pages == reg->gpu_alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	old_pages = kbase_reg_current_backed_size(reg);
+	if (new_pages > old_pages) {
+		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
+		/* Allocate some more pages */
+		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+		if (reg->cpu_alloc != reg->gpu_alloc) {
+			if (kbase_alloc_phy_pages_helper(
+					reg->gpu_alloc, delta) != 0) {
+				res = -ENOMEM;
+				kbase_free_phy_pages_helper(reg->cpu_alloc,
+						delta);
+				goto out_unlock;
+			}
+		}
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
+			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				kbase_free_phy_pages_helper(reg->gpu_alloc,
+						delta);
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+	} else {
+		delta = old_pages - new_pages;
+
+		/* Update all CPU mapping(s) */
+		kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			res = -ENOMEM;
+			goto out_unlock;
+		}
+
+		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+		if (reg->cpu_alloc != reg->gpu_alloc)
+			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
+
+	return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->free_on_close) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+				KBASE_REG_ZONE_SAME_VA);
+		/* Avoid freeing memory on the process death which results in
+		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
+		 */
+		if (!(current->flags & PF_EXITING))
+			kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#else
+static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+#endif
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+	pgoff_t addr;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	i = rel_pgoff;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
+#else
+	addr = (pgoff_t)(vmf->address >> PAGE_SHIFT);
+#endif
+	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
+		    PFN_DOWN(as_phys_addr_t(map->alloc->pages[i])));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+
+		i++; addr++;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	return VM_FAULT_SIGBUS;
+}
+
+const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		struct vm_area_struct *vma,
+		void *kaddr,
+		size_t nr_pages,
+		unsigned long aligned_offset,
+		int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	struct tagged_addr *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_pool which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		unsigned long addr = vma->vm_start + aligned_offset;
+		u64 start_off = vma->vm_pgoff - reg->start_pfn +
+			(aligned_offset>>PAGE_SHIFT);
+
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			phys_addr_t phys;
+
+			phys = as_phys_addr_t(page_array[i + start_off]);
+			err = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
+			if (WARN_ON(err))
+				break;
+
+			addr += PAGE_SIZE;
+		}
+	} else {
+		WARN_ON(aligned_offset);
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->region = reg;
+	map->free_on_close = free_on_close;
+	map->kctx = kctx;
+	map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
+			KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+		err = -ENOMEM;
+		new_reg->cpu_alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+static int kbasep_reg_mmap(struct kbase_context *kctx,
+			   struct vm_area_struct *vma,
+			   struct kbase_va_region **regm,
+			   size_t *nr_pages, size_t *aligned_offset)
+
+{
+	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	struct kbase_va_region *reg;
+	int err = 0;
+
+	*aligned_offset = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+
+	/* SAME_VA stuff, fetch the right region */
+	reg = kctx->pending_regions[cookie];
+	if (!reg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) {
+		/* incorrect mmap size */
+		/* leave the cookie for a potential later
+		 * mapping, or to be reclaimed later when the
+		 * context is freed */
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) ||
+	    (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) {
+		/* VM flags inconsistent with region flags */
+		err = -EPERM;
+		dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n",
+							__FILE__, __LINE__);
+		goto out;
+	}
+
+	/* adjust down nr_pages to what we have physically */
+	*nr_pages = kbase_reg_current_backed_size(reg);
+
+	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
+						reg->nr_pages, 1) != 0) {
+		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
+		/* Unable to map in GPU space. */
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+	/* no need for the cookie anymore */
+	kctx->pending_regions[cookie] = NULL;
+	kctx->cookies |= (1UL << cookie);
+
+	/*
+	 * Overwrite the offset with the region start_pfn, so we effectively
+	 * map from offset 0 in the region. However subtract the aligned
+	 * offset so that when user space trims the mapping the beginning of
+	 * the trimmed VMA has the correct vm_pgoff;
+	 */
+	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
+out:
+	*regm = reg;
+	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+
+	return err;
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg = NULL;
+	void *kaddr = NULL;
+	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+
+	if (!(vma->vm_flags & VM_READ))
+		vma->vm_flags &= ~VM_MAYREAD;
+	if (!(vma->vm_flags & VM_WRITE))
+		vma->vm_flags &= ~VM_MAYWRITE;
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		err = kbasep_reg_mmap(kctx, vma, &reg, &nr_pages,
+							&aligned_offset);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		break;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+					(u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages -
+					(vma->vm_pgoff - reg->start_pfn))) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+					!(reg->flags & KBASE_REG_CPU_RD)) ||
+					(vma->vm_flags & VM_WRITE &&
+					!(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
+							reg->cpu_alloc->type) {
+				if (0 != (vma->vm_pgoff - reg->start_pfn)) {
+					err = -EINVAL;
+					dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n",
+						__FILE__, __LINE__);
+					goto out_unlock;
+				}
+				err = dma_buf_mmap(
+					reg->cpu_alloc->imported.umm.dma_buf,
+					vma, vma->vm_pgoff - reg->start_pfn);
+				goto out_unlock;
+			}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+		} else {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+	} /* default */
+	} /* switch */
+
+	err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset,
+			free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
+{
+	size_t i;
+	off_t const offset = map->offset_in_page;
+	size_t const page_count = PFN_UP(offset + map->size);
+
+	/* Sync first page */
+	size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size);
+	struct tagged_addr cpu_pa = map->cpu_pages[0];
+	struct tagged_addr gpu_pa = map->gpu_pages[0];
+
+	kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest);
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		cpu_pa = map->cpu_pages[i];
+		gpu_pa = map->gpu_pages[i];
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1) {
+		cpu_pa = map->cpu_pages[page_count - 1];
+		gpu_pa = map->gpu_pages[page_count - 1];
+		sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1;
+		kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest);
+	}
+}
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	unsigned long page_index;
+	unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset_in_page + size);
+	struct tagged_addr *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
+		return -EINVAL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return -EINVAL;
+
+	page_index = offset_bytes >> PAGE_SHIFT;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		return -EINVAL;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		return -ENOMEM;
+
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		return -EINVAL;
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+	if (!page_array)
+		return -ENOMEM;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = as_page(page_array[page_index + i]);
+
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		return -ENOMEM;
+
+	map->offset_in_page = offset_in_page;
+	map->cpu_alloc = reg->cpu_alloc;
+	map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+	map->gpu_alloc = reg->gpu_alloc;
+	map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
+	map->size = size;
+	map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
+		!kbase_mem_is_imported(map->gpu_alloc->type);
+
+	if (map->sync_needed)
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
+
+	return 0;
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	void *addr = NULL;
+	u64 offset_bytes;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	int err;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+	 */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
+	offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+	cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+	gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+	err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
+	if (err < 0)
+		goto fail_vmap_phy_pages;
+
+	addr = map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return addr;
+
+fail_vmap_phy_pages:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(cpu_alloc);
+	kbase_mem_phy_alloc_put(gpu_alloc);
+
+	return NULL;
+}
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
+KBASE_EXPORT_TEST_API(kbase_vmap);
+
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+	vunmap(addr);
+
+	if (map->sync_needed)
+		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+
+	map->offset_in_page = 0;
+	map->cpu_pages = NULL;
+	map->gpu_pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->sync_needed = false;
+}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	kbase_vunmap_phy_pages(kctx, map);
+	map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+	map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+}
+KBASE_EXPORT_TEST_API(kbase_vunmap);
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	struct tagged_addr *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
+			     &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages,
+			KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	if (kbase_update_region_flags(kctx, reg, flags) != 0)
+		goto invalid_flags;
+
+	reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->cpu_alloc))
+		goto no_alloc;
+
+	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+	page_array = kbase_get_cpu_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
+
+	reg->cpu_alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+invalid_flags:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(!err);
+
+	kbase_mem_phy_alloc_put(reg->cpu_alloc);
+	kbase_mem_phy_alloc_put(reg->gpu_alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 0000000000000..a8a52a724c226
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,447 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+};
+
+/**
+ * kbase_mem_alloc - Create a new allocation for GPU
+ *
+ * @kctx:         The kernel context
+ * @va_pages:     The number of pages of virtual address space to reserve
+ * @commit_pages: The number of physical pages to allocate upfront
+ * @extent:       The number of extra pages to allocate on each GPU fault which
+ *                grows the region.
+ * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
+ *                properties for the new allocation.
+ * @gpu_va:       Start address of the memory region which was allocated from GPU
+ *                virtual address space.
+ *
+ * Return: 0 on success or error code
+ */
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
+		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
+		u64 *gpu_va);
+
+/**
+ * kbase_mem_query - Query properties of a GPU memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region
+ * @query:    The type of query, from KBASE_MEM_QUERY_* flags, which could be
+ *            regarding the amount of backing physical memory allocated so far
+ *            for the region or the size of the region or the flags associated
+ *            with the region.
+ * @out:      Pointer to the location to store the result of query.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query,
+		u64 *const out);
+
+/**
+ * kbase_mem_import - Import the external memory for use by the GPU
+ *
+ * @kctx:     The kernel context
+ * @type:     Type of external memory
+ * @phandle:  Handle to the external memory interpreted as per the type.
+ * @padding:  Amount of extra VA pages to append to the imported buffer
+ * @gpu_va:   GPU address assigned to the imported external memory
+ * @va_pages: Size of the memory region reserved from the GPU address space
+ * @flags:    bitmask of BASE_MEM_* flags to convey special requirements &
+ *            properties for the new allocation representing the external
+ *            memory.
+ * Return: 0 on success or error code
+ */
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+		void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages,
+		u64 *flags);
+
+/**
+ * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more
+ *                   memory regions
+ *
+ * @kctx:      The kernel context
+ * @flags:     bitmask of BASE_MEM_* flags.
+ * @stride:    Bytes between start of each memory region
+ * @nents:     The number of regions to pack together into the alias
+ * @ai:        Pointer to the struct containing the memory aliasing info
+ * @num_pages: Number of pages the alias will cover
+ *
+ * Return: 0 on failure or otherwise the GPU VA for the alias
+ */
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+
+/**
+ * kbase_mem_flags_change - Change the flags for a memory region
+ *
+ * @kctx:     The kernel context
+ * @gpu_addr: A GPU address contained within the memory region to modify.
+ * @flags:    The new flags to set
+ * @mask:     Mask of the flags, from BASE_MEM_*, to modify.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+
+/**
+ * kbase_mem_commit - Change the physical backing size of a region
+ *
+ * @kctx: The kernel context
+ * @gpu_addr: Handle to the memory region
+ * @new_pages: Number of physical pages to back the region with
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
+
+/**
+ * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on
+ *              device file /dev/malixx.
+ * @file: Pointer to the device file /dev/malixx instance.
+ * @vma:  Pointer to the struct containing the info where the GPU allocation
+ *        will be mapped in virtual address space of CPU.
+ *
+ * Return: 0 on success or error code
+ */
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
+struct kbase_vmap_struct {
+	off_t offset_in_page;
+	struct kbase_mem_phy_alloc *cpu_alloc;
+	struct kbase_mem_phy_alloc *gpu_alloc;
+	struct tagged_addr *cpu_pages;
+	struct tagged_addr *gpu_pages;
+	void *addr;
+	size_t size;
+	bool sync_needed;
+};
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ * All cache maintenance operations shall be ignored if the
+ * memory region has been imported.
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ *
+ * Note: All cache maintenance operations shall be ignored if the memory region
+ * has been imported.
+ */
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+extern const struct vm_operations_struct kbase_vm_ops;
+
+/**
+ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode
+ *                          CPU mapping.
+ * @kctx: Context the CPU mapping belongs to.
+ * @map:  Structure describing the CPU mapping, setup previously by the
+ *        kbase_vmap() call.
+ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate)
+ *
+ * Note: The caller shall ensure that CPU mapping is not revoked & remains
+ * active whilst the maintenance is in progress.
+ */
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+		struct kbase_vmap_struct *map, enum kbase_sync_type dest);
+
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ */
+void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a
+ *                                physical allocation
+ * @kctx:  The kernel base context associated with the mapping
+ * @alloc: Pointer to the allocation to terminate
+ *
+ * This function will unmap the kernel mapping, and free any structures used to
+ * track it.
+ */
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent
+ *                               mapping of a physical allocation
+ * @kctx:             The kernel base context @gpu_addr will be looked up in
+ * @gpu_addr:         The gpu address to lookup for the kernel-side CPU mapping
+ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer
+ *                    which will be used for a call to
+ *                    kbase_phy_alloc_mapping_put()
+ *
+ * Return: Pointer to a kernel-side accessible location that directly
+ *         corresponds to @gpu_addr, or NULL on failure
+ *
+ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access
+ * that location kernel-side. Only certain kinds of memory have a permanent
+ * kernel mapping, refer to the internal functions
+ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more
+ * information.
+ *
+ * If this function succeeds, a CPU access to the returned pointer will access
+ * the actual location represented by @gpu_addr. That is, the return value does
+ * not require any offset added to it to access the location specified in
+ * @gpu_addr
+ *
+ * The client must take care to either apply any necessary sync operations when
+ * accessing the data, or ensure that the enclosing region was coherent with
+ * the GPU, or uncached in the CPU.
+ *
+ * The refcount on the physical allocations backing the region are taken, so
+ * that they do not disappear whilst the client is accessing it. Once the
+ * client has finished accessing the memory, it must be released with a call to
+ * kbase_phy_alloc_mapping_put()
+ *
+ * Whilst this is expected to execute quickly (the mapping was already setup
+ * when the physical allocation was created), the call is not IRQ-safe due to
+ * the region lookup involved.
+ *
+ * An error code may indicate that:
+ * - a userside process has freed the allocation, and so @gpu_addr is no longer
+ *   valid
+ * - the region containing @gpu_addr does not support a permanent kernel mapping
+ */
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr,
+		struct kbase_vmap_struct **out_kern_mapping);
+
+/**
+ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a
+ *                               physical allocation
+ * @kctx:         The kernel base context associated with the mapping
+ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained
+ *                from a call to kbase_phy_alloc_mapping_get()
+ *
+ * Releases the reference to the allocations backing @kern_mapping that was
+ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used
+ * when the client no longer needs to access the kernel-side CPU pointer.
+ *
+ * If this was the last reference on the underlying physical allocations, they
+ * will go through the normal allocation free steps, which also includes an
+ * unmap of the permanent kernel mapping for those allocations.
+ *
+ * Due to these operations, the function is not IRQ-safe. However it is
+ * expected to execute quickly in the normal case, i.e. when the region holding
+ * the physical allocation is still present.
+ */
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+		struct kbase_vmap_struct *kern_mapping);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 0000000000000..70116030f233e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,166 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+	KBASE_SYNC_TO_CPU,
+	KBASE_SYNC_TO_DEVICE
+};
+
+struct tagged_addr { phys_addr_t tagged_addr; };
+
+#define HUGE_PAGE    (1u << 0)
+#define HUGE_HEAD    (1u << 1)
+#define FROM_PARTIAL (1u << 2)
+
+/*
+ * Note: if macro for converting physical address to page is not defined
+ * in the kernel itself, it is defined hereby. This is to avoid build errors
+ * which are reported during builds for some architectures.
+ */
+#ifndef phys_to_page
+#define phys_to_page(phys)	(pfn_to_page((phys) >> PAGE_SHIFT))
+#endif
+
+/**
+ * as_phys_addr_t - Retrieve the physical address from tagged address by
+ *                  masking the lower order 12 bits.
+ * @t: tagged address to be translated.
+ *
+ * Return: physical address corresponding to tagged address.
+ */
+static inline phys_addr_t as_phys_addr_t(struct tagged_addr t)
+{
+	return t.tagged_addr & PAGE_MASK;
+}
+
+/**
+ * as_page - Retrieve the struct page from a tagged address
+ * @t: tagged address to be translated.
+ *
+ * Return: pointer to struct page corresponding to tagged address.
+ */
+static inline struct page *as_page(struct tagged_addr t)
+{
+	return phys_to_page(as_phys_addr_t(t));
+}
+
+/**
+ * as_tagged - Convert the physical address to tagged address type though
+ *             there is no tag info present, the lower order 12 bits will be 0
+ * @phys: physical address to be converted to tagged type
+ *
+ * This is used for 4KB physical pages allocated by the Driver or imported pages
+ * and is needed as physical pages tracking object stores the reference for
+ * physical pages using tagged address type in lieu of the type generally used
+ * for physical addresses.
+ *
+ * Return: address of tagged address type.
+ */
+static inline struct tagged_addr as_tagged(phys_addr_t phys)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = phys & PAGE_MASK;
+	return t;
+}
+
+/**
+ * as_tagged_tag - Form the tagged address by storing the tag or metadata in the
+ *                 lower order 12 bits of physial address
+ * @phys: physical address to be converted to tagged address
+ * @tag:  tag to be stored along with the physical address.
+ *
+ * The tag info is used while freeing up the pages
+ *
+ * Return: tagged address storing physical address & tag.
+ */
+static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag)
+{
+	struct tagged_addr t;
+
+	t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK);
+	return t;
+}
+
+/**
+ * is_huge - Check if the physical page is one of the 512 4KB pages of the
+ *           large page which was not split to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page belongs to large page, or false
+ */
+static inline bool is_huge(struct tagged_addr t)
+{
+	return t.tagged_addr & HUGE_PAGE;
+}
+
+/**
+ * is_huge_head - Check if the physical page is the first 4KB page of the
+ *                512 4KB pages within a large page which was not split
+ *                to be used partially
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page is the first page of a large page, or false
+ */
+static inline bool is_huge_head(struct tagged_addr t)
+{
+	int mask = HUGE_HEAD | HUGE_PAGE;
+
+	return mask == (t.tagged_addr & mask);
+}
+
+/**
+ * is_partial - Check if the physical page is one of the 512 pages of the
+ *              large page which was split in 4KB pages to be used
+ *              partially for allocations >= 2 MB in size.
+ * @t: tagged address storing the tag in the lower order bits.
+ *
+ * Return: true if page was taken from large page used partially, or false
+ */
+static inline bool is_partial(struct tagged_addr t)
+{
+	return t.tagged_addr & FROM_PARTIAL;
+}
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
new file mode 100644
index 0000000000000..0f91be17a81b7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -0,0 +1,842 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/shrinker.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+#define pool_dbg(pool, format, ...) \
+	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
+		(pool->next_pool) ? "kctx" : "kbdev",	\
+		kbase_mem_pool_size(pool),	\
+		kbase_mem_pool_max_size(pool),	\
+		##__VA_ARGS__)
+
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
+static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
+{
+	ssize_t max_size = kbase_mem_pool_max_size(pool);
+	ssize_t cur_size = kbase_mem_pool_size(pool);
+
+	return max(max_size - cur_size, (ssize_t)0);
+}
+
+static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool);
+}
+
+static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
+{
+	return kbase_mem_pool_size(pool) == 0;
+}
+
+static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_add(&p->lru, &pool->page_list);
+	pool->cur_size++;
+
+	pool_dbg(pool, "added page\n");
+}
+
+static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_locked(pool, p);
+	kbase_mem_pool_unlock(pool);
+}
+
+static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	lockdep_assert_held(&pool->pool_lock);
+
+	list_splice(page_list, &pool->page_list);
+	pool->cur_size += nr_pages;
+
+	pool_dbg(pool, "added %zu pages\n", nr_pages);
+}
+
+static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool,
+		struct list_head *page_list, size_t nr_pages)
+{
+	kbase_mem_pool_lock(pool);
+	kbase_mem_pool_add_list_locked(pool, page_list, nr_pages);
+	kbase_mem_pool_unlock(pool);
+}
+
+static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (kbase_mem_pool_is_empty(pool))
+		return NULL;
+
+	p = list_first_entry(&pool->page_list, struct page, lru);
+	list_del_init(&p->lru);
+	pool->cur_size--;
+
+	pool_dbg(pool, "removed page\n");
+
+	return p;
+}
+
+static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	kbase_mem_pool_lock(pool);
+	p = kbase_mem_pool_remove_locked(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return p;
+}
+
+static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_sync_single_for_device(dev, kbase_dma_addr(p),
+			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
+}
+
+static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	int i;
+
+	for (i = 0; i < (1U << pool->order); i++)
+		clear_highpage(p+i);
+
+	kbase_mem_pool_sync_page(pool, p);
+}
+
+static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
+		struct page *p)
+{
+	/* Zero page before spilling */
+	kbase_mem_pool_zero_page(next_pool, p);
+
+	kbase_mem_pool_add(next_pool, p);
+}
+
+struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+	gfp_t gfp;
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr;
+	int i;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER | __GFP_ZERO;
+#else
+	gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+	/* don't warn on higer order failures */
+	if (pool->order)
+		gfp |= __GFP_NOWARN;
+
+	p = alloc_pages(gfp, pool->order);
+	if (!p)
+		return NULL;
+
+	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
+				DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		__free_pages(p, pool->order);
+		return NULL;
+	}
+
+	WARN_ON(dma_addr != page_to_phys(p));
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i);
+
+	return p;
+}
+
+static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
+		struct page *p)
+{
+	struct device *dev = pool->kbdev->dev;
+	dma_addr_t dma_addr = kbase_dma_addr(p);
+	int i;
+
+	dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order),
+		       DMA_BIDIRECTIONAL);
+	for (i = 0; i < (1u << pool->order); i++)
+		kbase_clear_dma_addr(p+i);
+	__free_pages(p, pool->order);
+
+	pool_dbg(pool, "freed page to kernel\n");
+}
+
+static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	struct page *p;
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) {
+		p = kbase_mem_pool_remove_locked(pool);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	return i;
+}
+
+static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
+		size_t nr_to_shrink)
+{
+	size_t nr_freed;
+
+	kbase_mem_pool_lock(pool);
+	nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	kbase_mem_pool_unlock(pool);
+
+	return nr_freed;
+}
+
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+		size_t nr_to_grow)
+{
+	struct page *p;
+	size_t i;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->dont_reclaim = true;
+	for (i = 0; i < nr_to_grow; i++) {
+		if (pool->dying) {
+			pool->dont_reclaim = false;
+			kbase_mem_pool_shrink_locked(pool, nr_to_grow);
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+		kbase_mem_pool_unlock(pool);
+
+		p = kbase_mem_alloc_page(pool);
+		if (!p) {
+			kbase_mem_pool_lock(pool);
+			pool->dont_reclaim = false;
+			kbase_mem_pool_unlock(pool);
+
+			return -ENOMEM;
+		}
+
+		kbase_mem_pool_lock(pool);
+		kbase_mem_pool_add_locked(pool, p);
+	}
+	pool->dont_reclaim = false;
+	kbase_mem_pool_unlock(pool);
+
+	return 0;
+}
+
+void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+{
+	size_t cur_size;
+	int err = 0;
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	if (new_size > pool->max_size)
+		new_size = pool->max_size;
+
+	if (new_size < cur_size)
+		kbase_mem_pool_shrink(pool, cur_size - new_size);
+	else if (new_size > cur_size)
+		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	if (err) {
+		size_t grown_size = kbase_mem_pool_size(pool);
+
+		dev_warn(pool->kbdev->dev,
+			 "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n",
+			 (new_size - cur_size), (grown_size - cur_size));
+	}
+}
+
+void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
+{
+	size_t cur_size;
+	size_t nr_to_shrink;
+
+	kbase_mem_pool_lock(pool);
+
+	pool->max_size = max_size;
+
+	cur_size = kbase_mem_pool_size(pool);
+	if (max_size < cur_size) {
+		nr_to_shrink = cur_size - max_size;
+		kbase_mem_pool_shrink_locked(pool, nr_to_shrink);
+	}
+
+	kbase_mem_pool_unlock(pool);
+}
+
+
+static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	size_t pool_size;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+	pool_size = kbase_mem_pool_size(pool);
+	kbase_mem_pool_unlock(pool);
+
+	return pool_size;
+}
+
+static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_mem_pool *pool;
+	unsigned long freed;
+
+	pool = container_of(s, struct kbase_mem_pool, reclaim);
+
+	kbase_mem_pool_lock(pool);
+	if (pool->dont_reclaim && !pool->dying) {
+		kbase_mem_pool_unlock(pool);
+		return 0;
+	}
+
+	pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan);
+
+	freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan);
+
+	kbase_mem_pool_unlock(pool);
+
+	pool_dbg(pool, "reclaim freed %ld pages\n", freed);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_pool_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_pool_reclaim_count_objects(s, sc);
+
+	return kbase_mem_pool_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_pool_init(struct kbase_mem_pool *pool,
+		size_t max_size,
+		size_t order,
+		struct kbase_device *kbdev,
+		struct kbase_mem_pool *next_pool)
+{
+	pool->cur_size = 0;
+	pool->max_size = max_size;
+	pool->order = order;
+	pool->kbdev = kbdev;
+	pool->next_pool = next_pool;
+	pool->dying = false;
+
+	spin_lock_init(&pool->pool_lock);
+	INIT_LIST_HEAD(&pool->page_list);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink;
+#else
+	pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
+	pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
+#endif
+	pool->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	pool->reclaim.batch = 0;
+#endif
+	register_shrinker(&pool->reclaim);
+
+	pool_dbg(pool, "initialized\n");
+
+	return 0;
+}
+
+void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool)
+{
+	kbase_mem_pool_lock(pool);
+	pool->dying = true;
+	kbase_mem_pool_unlock(pool);
+}
+
+void kbase_mem_pool_term(struct kbase_mem_pool *pool)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p, *tmp;
+	size_t nr_to_spill = 0;
+	LIST_HEAD(spill_list);
+	LIST_HEAD(free_list);
+	int i;
+
+	pool_dbg(pool, "terminate()\n");
+
+	unregister_shrinker(&pool->reclaim);
+
+	kbase_mem_pool_lock(pool);
+	pool->max_size = 0;
+
+	if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool (may overspill) */
+		nr_to_spill = kbase_mem_pool_capacity(next_pool);
+		nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill);
+
+		/* Zero pages first without holding the next_pool lock */
+		for (i = 0; i < nr_to_spill; i++) {
+			p = kbase_mem_pool_remove_locked(pool);
+			list_add(&p->lru, &spill_list);
+		}
+	}
+
+	while (!kbase_mem_pool_is_empty(pool)) {
+		/* Free remaining pages to kernel */
+		p = kbase_mem_pool_remove_locked(pool);
+		list_add(&p->lru, &free_list);
+	}
+
+	kbase_mem_pool_unlock(pool);
+
+	if (next_pool && nr_to_spill) {
+		list_for_each_entry(p, &spill_list, lru)
+			kbase_mem_pool_zero_page(pool, p);
+
+		/* Add new page list to next_pool */
+		kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
+
+		pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
+	}
+
+	list_for_each_entry_safe(p, tmp, &free_list, lru) {
+		list_del_init(&p->lru);
+		kbase_mem_pool_free_page(pool, p);
+	}
+
+	pool_dbg(pool, "terminated\n");
+}
+
+struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	do {
+		pool_dbg(pool, "alloc()\n");
+		p = kbase_mem_pool_remove(pool);
+
+		if (p)
+			return p;
+
+		pool = pool->next_pool;
+	} while (pool);
+
+	return NULL;
+}
+
+struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
+{
+	struct page *p;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "alloc_locked()\n");
+	p = kbase_mem_pool_remove_locked(pool);
+
+	if (p)
+		return p;
+
+	return NULL;
+}
+
+void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+
+	pool_dbg(pool, "free()\n");
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add(pool, p);
+	} else if (next_pool && !kbase_mem_pool_is_full(next_pool)) {
+		/* Spill to next pool */
+		kbase_mem_pool_spill(next_pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
+		bool dirty)
+{
+	pool_dbg(pool, "free_locked()\n");
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!kbase_mem_pool_is_full(pool)) {
+		/* Add to our own pool */
+		if (dirty)
+			kbase_mem_pool_sync_page(pool, p);
+
+		kbase_mem_pool_add_locked(pool, p);
+	} else {
+		/* Free page */
+		kbase_mem_pool_free_page(pool, p);
+	}
+}
+
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
+		struct tagged_addr *pages, bool partial_allowed)
+{
+	struct page *p;
+	size_t nr_from_pool;
+	size_t i = 0;
+	int err = -ENOMEM;
+	size_t nr_pages_internal;
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal);
+
+	/* Get pages from this pool */
+	kbase_mem_pool_lock(pool);
+	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+	while (nr_from_pool--) {
+		int j;
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			pages[i++] = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++)
+				pages[i++] = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+		} else {
+			pages[i++] = as_tagged(page_to_phys(p));
+		}
+	}
+	kbase_mem_pool_unlock(pool);
+
+	if (i != nr_4k_pages && pool->next_pool) {
+		/* Allocate via next pool */
+		err = kbase_mem_pool_alloc_pages(pool->next_pool,
+				nr_4k_pages - i, pages + i, partial_allowed);
+
+		if (err < 0)
+			goto err_rollback;
+
+		i += err;
+	} else {
+		/* Get any remaining pages from kernel */
+		while (i != nr_4k_pages) {
+			p = kbase_mem_alloc_page(pool);
+			if (!p) {
+				if (partial_allowed)
+					goto done;
+				else
+					goto err_rollback;
+			}
+
+			if (pool->order) {
+				int j;
+
+				pages[i++] = as_tagged_tag(page_to_phys(p),
+							   HUGE_PAGE |
+							   HUGE_HEAD);
+				for (j = 1; j < (1u << pool->order); j++) {
+					phys_addr_t phys;
+
+					phys = page_to_phys(p) + PAGE_SIZE * j;
+					pages[i++] = as_tagged_tag(phys,
+								   HUGE_PAGE);
+				}
+			} else {
+				pages[i++] = as_tagged(page_to_phys(p));
+			}
+		}
+	}
+
+done:
+	pool_dbg(pool, "alloc_pages(%zu) done\n", i);
+	return i;
+
+err_rollback:
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
+	return err;
+}
+
+int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_4k_pages, struct tagged_addr *pages)
+{
+	struct page *p;
+	size_t i;
+	size_t nr_pages_internal;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
+
+	if (nr_pages_internal * (1u << pool->order) != nr_4k_pages)
+		return -EINVAL;
+
+	pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages);
+	pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n",
+			nr_pages_internal);
+
+	if (kbase_mem_pool_size(pool) < nr_pages_internal) {
+		pool_dbg(pool, "Failed alloc\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages_internal; i++) {
+		int j;
+
+		p = kbase_mem_pool_remove_locked(pool);
+		if (pool->order) {
+			*pages++ = as_tagged_tag(page_to_phys(p),
+						   HUGE_HEAD | HUGE_PAGE);
+			for (j = 1; j < (1u << pool->order); j++) {
+				*pages++ = as_tagged_tag(page_to_phys(p) +
+							   PAGE_SIZE * j,
+							   HUGE_PAGE);
+			}
+		} else {
+			*pages++ = as_tagged(page_to_phys(p));
+		}
+	}
+
+	return nr_4k_pages;
+}
+
+static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
+				     size_t nr_pages, struct tagged_addr *pages,
+				     bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first without holding the pool lock */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = as_page(pages[i]);
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages,
+		bool zero, bool sync)
+{
+	struct page *p;
+	size_t nr_to_pool = 0;
+	LIST_HEAD(new_page_list);
+	size_t i;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	if (!nr_pages)
+		return;
+
+	pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n",
+			nr_pages, zero, sync);
+
+	/* Zero/sync pages first */
+	for (i = 0; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
+			p = as_page(pages[i]);
+			if (zero)
+				kbase_mem_pool_zero_page(pool, p);
+			else if (sync)
+				kbase_mem_pool_sync_page(pool, p);
+
+			list_add(&p->lru, &new_page_list);
+			nr_to_pool++;
+		}
+		pages[i] = as_tagged(0);
+	}
+
+	/* Add new page list to pool */
+	kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool);
+
+	pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n",
+			nr_pages, nr_to_pool);
+}
+
+void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+		struct tagged_addr *pages, bool dirty, bool reclaimed)
+{
+	struct kbase_mem_pool *next_pool = pool->next_pool;
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+
+		i += nr_to_pool;
+
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
+
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = as_page(pages[i]);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages(%zu) done\n", nr_pages);
+}
+
+
+void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool,
+		size_t nr_pages, struct tagged_addr *pages, bool dirty,
+		bool reclaimed)
+{
+	struct page *p;
+	size_t nr_to_pool;
+	LIST_HEAD(to_pool_list);
+	size_t i = 0;
+
+	lockdep_assert_held(&pool->pool_lock);
+
+	pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages);
+
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
+
+		kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false,
+				dirty);
+
+		i += nr_to_pool;
+	}
+
+	/* Free any remaining pages to kernel */
+	for (; i < nr_pages; i++) {
+		if (unlikely(!as_phys_addr_t(pages[i])))
+			continue;
+
+		if (is_huge(pages[i]) && !is_huge_head(pages[i])) {
+			pages[i] = as_tagged(0);
+			continue;
+		}
+
+		p = as_page(pages[i]);
+
+		kbase_mem_pool_free_page(pool, p);
+		pages[i] = as_tagged(0);
+	}
+
+	pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
new file mode 100644
index 0000000000000..4b4eeb32d2c1f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -0,0 +1,93 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <mali_kbase_mem_pool_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_trim(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
+		kbase_mem_pool_debugfs_size_get,
+		kbase_mem_pool_debugfs_size_set,
+		"%llu\n");
+
+static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	*val = kbase_mem_pool_max_size(pool);
+
+	return 0;
+}
+
+static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+{
+	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+
+	kbase_mem_pool_set_max_size(pool, val);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
+		kbase_mem_pool_debugfs_max_size_get,
+		kbase_mem_pool_debugfs_max_size_set,
+		"%llu\n");
+
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool)
+{
+	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			pool, &kbase_mem_pool_debugfs_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
+			lp_pool, &kbase_mem_pool_debugfs_max_size_fops);
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
new file mode 100644
index 0000000000000..990d91c8fbe7e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H
+#define _KBASE_MEM_POOL_DEBUGFS_H
+
+#include <mali_kbase.h>
+
+/**
+ * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * @parent:  Parent debugfs dentry
+ * @pool:    Memory pool of small pages to control
+ * @lp_pool: Memory pool of large pages to control
+ *
+ * Adds four debugfs files under @parent:
+ * - mem_pool_size: get/set the current size of @pool
+ * - mem_pool_max_size: get/set the max size of @pool
+ * - lp_mem_pool_size: get/set the current size of @lp_pool
+ * - lp_mem_pool_max_size: get/set the max size of @lp_pool
+ */
+void kbase_mem_pool_debugfs_init(struct dentry *parent,
+		struct kbase_mem_pool *pool,
+		struct kbase_mem_pool *lp_pool);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 0000000000000..d4f8433f40879
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
+	seq_putc(sfile, '\n');
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		if (!debugfs_create_file("mem_profile", S_IRUGO,
+					kctx->kctx_dentry, kctx,
+					&kbasep_mem_profile_debugfs_fops)) {
+			err = -EAGAIN;
+		} else {
+			kbase_ctx_flag_set(kctx,
+					   KCTX_MEM_PROFILE_INITIALIZED);
+		}
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+		kfree(kctx->mem_profile_data);
+		kctx->mem_profile_data = data;
+		kctx->mem_profile_size = size;
+	} else {
+		kfree(data);
+	}
+
+	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	mutex_unlock(&kctx->mem_profile_lock);
+
+	return err;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->mem_profile_lock);
+
+	dev_dbg(kctx->kbdev->dev, "initialised: %d",
+				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	kctx->mem_profile_size = 0;
+
+	mutex_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size)
+{
+	kfree(data);
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 0000000000000..1462247c3bcae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
+ *
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
+ *
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
+ */
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+					size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 0000000000000..43b0f6c032110
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
+	((size_t) (64 + ((80 + (56 * 64)) * 34) + 56))
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 0000000000000..3ba861d463df6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,2664 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_instr_defs.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
+#include <mali_kbase_mem.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
+/**
+ * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
+ * @kbdev: Device pointer.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ *
+ * This is used for MMU tables which do not belong to a user space context.
+ */
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr);
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @kbdev:	Device pointer.
+ * @handle:	Address of DMA region.
+ * @size:       Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
+		dma_addr_t handle, size_t size)
+{
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags);
+
+/**
+ * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
+ *                               a region on a GPU page fault
+ *
+ * @reg:           The region that will be backed with more pages
+ * @fault_rel_pfn: PFN of the fault relative to the start of the region
+ *
+ * This calculates how much to increase the backing of a region by, based on
+ * where a GPU page fault occurred and the flags in the region.
+ *
+ * This can be more than the minimum number of pages that would reach
+ * @fault_rel_pfn, for example to reduce the overall rate of page fault
+ * interrupts on a region, or to ensure that the end address is aligned.
+ *
+ * Return: the number of backed pages to increase by
+ */
+static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
+		struct kbase_va_region *reg, size_t fault_rel_pfn)
+{
+	size_t multiple = reg->extent;
+	size_t reg_current_size = kbase_reg_current_backed_size(reg);
+	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
+	size_t remainder;
+
+	if (!multiple) {
+		dev_warn(kbdev->dev,
+				"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+				((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
+		return minimum_extra;
+	}
+
+	/* Calculate the remainder to subtract from minimum_extra to make it
+	 * the desired (rounded down) multiple of the extent.
+	 * Depending on reg's flags, the base used for calculating multiples is
+	 * different */
+	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+		/* multiple is based from the top of the initial commit, which
+		 * has been allocated in such a way that (start_pfn +
+		 * initial_commit) is already aligned to multiple. Hence the
+		 * pfn for the end of committed memory will also be aligned to
+		 * multiple */
+		size_t initial_commit = reg->initial_commit;
+
+		if (fault_rel_pfn < initial_commit) {
+			/* this case is just to catch in case it's been
+			 * recommitted by userspace to be smaller than the
+			 * initial commit */
+			minimum_extra = initial_commit - reg_current_size;
+			remainder = 0;
+		} else {
+			/* same as calculating (fault_rel_pfn - initial_commit + 1) */
+			size_t pages_after_initial = minimum_extra + reg_current_size - initial_commit;
+
+			remainder = pages_after_initial % multiple;
+		}
+	} else {
+		/* multiple is based from the current backed size, even if the
+		 * current backed size/pfn for end of committed memory are not
+		 * themselves aligned to multiple */
+		remainder = minimum_extra % multiple;
+	}
+
+	if (remainder == 0)
+		return minimum_extra;
+
+	return minimum_extra + multiple - remainder;
+}
+
+#ifdef CONFIG_MALI_JOB_DUMP
+static void kbase_gpu_mmu_handle_write_faulting_as(
+				struct kbase_device *kbdev,
+				struct kbase_as *faulting_as,
+				u64 start_pfn, size_t nr, u32 op)
+{
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+	kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
+			nr, op, 1);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+	kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+			KBASE_MMU_FAULT_TYPE_PAGE);
+}
+
+static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
+			struct kbase_as *faulting_as)
+{
+	struct kbasep_gwt_list_element *pos;
+	struct kbase_va_region *region;
+	struct kbase_device *kbdev;
+	u64 fault_pfn, pfn_offset;
+	u32 op;
+	int ret;
+	int as_no;
+
+	as_no = faulting_as->number;
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Find region and check if it should be writable. */
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		return;
+	}
+
+	if (!(region->flags & KBASE_REG_GPU_WR)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Region does not have write permissions");
+		return;
+	}
+
+	/* Capture addresses of faulting write location
+	 * for job dumping if write tracking is enabled.
+	 */
+	if (kctx->gwt_enabled) {
+		u64 page_addr = faulting_as->fault_addr & PAGE_MASK;
+		bool found = false;
+		/* Check if this write was already handled. */
+		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
+			if (page_addr == pos->page_addr) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = page_addr;
+				pos->num_pages = 1;
+				list_add(&pos->link, &kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+	}
+
+	pfn_offset = fault_pfn - region->start_pfn;
+	/* Now make this faulting page writable to GPU. */
+	ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				1, region->flags);
+
+	/* flush L2 and unlock the VA (resumes the MMU) */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+		op = AS_COMMAND_FLUSH;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
+			fault_pfn, 1, op);
+
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
+			struct kbase_as	*faulting_as)
+{
+	u32 fault_status;
+
+	fault_status = faulting_as->fault_status;
+
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Execute Permission fault");
+		break;
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Read Permission fault");
+		break;
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown Permission fault");
+		break;
+	}
+}
+#endif
+
+#define MAX_POOL_LEVEL 2
+
+/**
+ * page_fault_try_alloc - Try to allocate memory from a context pool
+ * @kctx:          Context pointer
+ * @region:        Region to grow
+ * @new_pages:     Number of 4 kB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on
+ *                 failure. This can be either 4 kB or 2 MB pages, depending on
+ *                 the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
+ *                 for 2 MB, false for 4 kB.
+ * @prealloc_sas:  Pointer to kbase_sub_alloc structures
+ *
+ * This function will try to allocate as many pages as possible from the context
+ * pool, then if required will try to allocate the remaining pages from the
+ * device pool.
+ *
+ * This function will not allocate any new memory beyond that that is already
+ * present in the context or device pools. This is because it is intended to be
+ * called with the vm_lock held, which could cause recursive locking if the
+ * allocation caused the out-of-memory killer to run.
+ *
+ * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
+ * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ *
+ * Return: true if successful, false on failure
+ */
+static bool page_fault_try_alloc(struct kbase_context *kctx,
+		struct kbase_va_region *region, size_t new_pages,
+		int *pages_to_grow, bool *grow_2mb_pool,
+		struct kbase_sub_alloc **prealloc_sas)
+{
+	struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
+	struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
+	size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+	struct kbase_mem_pool *pool, *root_pool;
+	int pool_level = 0;
+	bool alloc_failed = false;
+	size_t pages_still_required;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	if (new_pages >= (SZ_2M / SZ_4K)) {
+		root_pool = &kctx->lp_mem_pool;
+		*grow_2mb_pool = true;
+	} else {
+#endif
+		root_pool = &kctx->mem_pool;
+		*grow_2mb_pool = false;
+#ifdef CONFIG_MALI_2MB_ALLOC
+	}
+#endif
+
+	if (region->gpu_alloc != region->cpu_alloc)
+		new_pages *= 2;
+
+	pages_still_required = new_pages;
+
+	/* Determine how many pages are in the pools before trying to allocate.
+	 * Don't attempt to allocate & free if the allocation can't succeed.
+	 */
+	for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
+		size_t pool_size_4k;
+
+		kbase_mem_pool_lock(pool);
+
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		if (pool_size_4k >= pages_still_required)
+			pages_still_required = 0;
+		else
+			pages_still_required -= pool_size_4k;
+
+		kbase_mem_pool_unlock(pool);
+
+		if (!pages_still_required)
+			break;
+	}
+
+	if (pages_still_required) {
+		/* Insufficient pages in pools. Don't try to allocate - just
+		 * request a grow.
+		 */
+		*pages_to_grow = pages_still_required;
+
+		return false;
+	}
+
+	/* Since we've dropped the pool locks, the amount of memory in the pools
+	 * may change between the above check and the actual allocation.
+	 */
+	pool = root_pool;
+	for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+		size_t pool_size_4k;
+		size_t pages_to_alloc_4k;
+		size_t pages_to_alloc_4k_per_alloc;
+
+		kbase_mem_pool_lock(pool);
+
+		/* Allocate as much as possible from this pool*/
+		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+		pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+		if (region->gpu_alloc == region->cpu_alloc)
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
+		else
+			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
+
+		pages_alloced[pool_level] = pages_to_alloc_4k;
+		if (pages_to_alloc_4k) {
+			gpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->gpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[0]);
+
+			if (!gpu_pages[pool_level]) {
+				alloc_failed = true;
+			} else if (region->gpu_alloc != region->cpu_alloc) {
+				cpu_pages[pool_level] =
+					kbase_alloc_phy_pages_helper_locked(
+						region->cpu_alloc, pool,
+						pages_to_alloc_4k_per_alloc,
+						&prealloc_sas[1]);
+
+				if (!cpu_pages[pool_level])
+					alloc_failed = true;
+			}
+		}
+
+		kbase_mem_pool_unlock(pool);
+
+		if (alloc_failed) {
+			WARN_ON(!new_pages);
+			WARN_ON(pages_to_alloc_4k >= new_pages);
+			WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+			break;
+		}
+
+		new_pages -= pages_to_alloc_4k;
+
+		if (!new_pages)
+			break;
+
+		pool = pool->next_pool;
+		if (!pool)
+			break;
+	}
+
+	if (new_pages) {
+		/* Allocation was unsuccessful */
+		int max_pool_level = pool_level;
+
+		pool = root_pool;
+
+		/* Free memory allocated so far */
+		for (pool_level = 0; pool_level <= max_pool_level;
+				pool_level++) {
+			kbase_mem_pool_lock(pool);
+
+			if (region->gpu_alloc != region->cpu_alloc) {
+				if (pages_alloced[pool_level] &&
+						cpu_pages[pool_level])
+					kbase_free_phy_pages_helper_locked(
+						region->cpu_alloc,
+						pool, cpu_pages[pool_level],
+						pages_alloced[pool_level]);
+			}
+
+			if (pages_alloced[pool_level] && gpu_pages[pool_level])
+				kbase_free_phy_pages_helper_locked(
+						region->gpu_alloc,
+						pool, gpu_pages[pool_level],
+						pages_alloced[pool_level]);
+
+			kbase_mem_pool_unlock(pool);
+
+			pool = pool->next_pool;
+		}
+
+		/*
+		 * If the allocation failed despite there being enough memory in
+		 * the pool, then just fail. Otherwise, try to grow the memory
+		 * pool.
+		 */
+		if (alloc_failed)
+			*pages_to_grow = 0;
+		else
+			*pages_to_grow = new_pages;
+
+		return false;
+	}
+
+	/* Allocation was successful. No pages to grow, return success. */
+	*pages_to_grow = 0;
+
+	return true;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_status;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	int err;
+	bool grown = false;
+	int pages_to_grow;
+	bool grow_2mb_pool;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	if (unlikely(faulting_as->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Protected mode fault");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+
+		goto fault_done;
+	}
+
+	fault_status = faulting_as->fault_status;
+	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+		/* need to check against the region to handle this one */
+		break;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+#ifdef CONFIG_MALI_JOB_DUMP
+		/* If GWT was ever enabled then we need to handle
+		 * write fault pages even if the feature was disabled later.
+		 */
+		if (kctx->gwt_was_enabled) {
+			kbase_gpu_mmu_handle_permission_fault(kctx,
+							faulting_as);
+			goto fault_done;
+		}
+#endif
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Translation table bus fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+		/* nothing to do, but we don't expect this fault currently */
+		dev_warn(kbdev->dev, "Access flag unexpectedly set");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		else
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Unknown fault code");
+		goto fault_done;
+
+	default:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Unknown fault code");
+		goto fault_done;
+	}
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs if necessary */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+		if (!prealloc_sas[i]) {
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Failed pre-allocating memory for sub-allocations' metadata");
+			goto fault_done;
+		}
+	}
+#endif /* CONFIG_MALI_2MB_ALLOC */
+
+page_fault_retry:
+	/* so we have a translation fault, let's see if it is for growable
+	 * memory */
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx,
+			faulting_as->fault_addr);
+	if (!region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"DMA-BUF is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+			!= GROWABLE_FLAGS_REQUIRED) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
+
+	/* cap to max vsize */
+	new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
+
+	if (0 == new_pages) {
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	pages_to_grow = 0;
+
+	spin_lock(&kctx->mem_partials_lock);
+	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
+			&grow_2mb_pool, prealloc_sas);
+	spin_unlock(&kctx->mem_partials_lock);
+
+	if (grown) {
+		u64 pfn_offset;
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* set up the new pages */
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
+		if (err) {
+			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+			if (region->gpu_alloc != region->cpu_alloc)
+				kbase_free_phy_pages_helper(region->cpu_alloc,
+						new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+#ifdef CONFIG_MALI_JOB_DUMP
+		if (kctx->gwt_enabled) {
+			/* GWT also tracks growable regions. */
+			struct kbasep_gwt_list_element *pos;
+
+			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+			if (pos) {
+				pos->region = region;
+				pos->page_addr = (region->start_pfn +
+							pfn_offset) <<
+							 PAGE_SHIFT;
+				pos->num_pages = new_pages;
+				list_add(&pos->link,
+					&kctx->gwt_current_list);
+			} else {
+				dev_warn(kbdev->dev, "kmalloc failure");
+			}
+		}
+#endif
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		int ret = -ENOMEM;
+
+		kbase_gpu_vm_unlock(kctx);
+
+		/* If the memory pool was insufficient then grow it and retry.
+		 * Otherwise fail the allocation.
+		 */
+		if (pages_to_grow > 0) {
+#ifdef CONFIG_MALI_2MB_ALLOC
+			if (grow_2mb_pool) {
+				/* Round page requirement up to nearest 2 MB */
+				pages_to_grow = (pages_to_grow +
+					((1 << kctx->lp_mem_pool.order) - 1))
+						>> kctx->lp_mem_pool.order;
+				ret = kbase_mem_pool_grow(&kctx->lp_mem_pool,
+						pages_to_grow);
+			} else {
+#endif
+				ret = kbase_mem_pool_grow(&kctx->mem_pool,
+						pages_to_grow);
+#ifdef CONFIG_MALI_2MB_ALLOC
+			}
+#endif
+		}
+		if (ret < 0) {
+			/* failed to extend, handle as a normal PF */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page allocation failure");
+		} else {
+			goto page_fault_retry;
+		}
+	}
+
+fault_done:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
+
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut)
+{
+	u64 *page;
+	int i;
+	struct page *p;
+
+	p = kbase_mem_pool_alloc(&kbdev->mem_pool);
+	if (!p)
+		return 0;
+
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	/* If the MMU tables belong to a context then account the memory usage
+	 * to that context, otherwise the MMU tables are device wide and are
+	 * only accounted to the device.
+	 */
+	if (mmut->kctx) {
+		int new_page_count;
+
+		new_page_count = kbase_atomic_add_pages(1,
+				&mmut->kctx->used_pages);
+		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				mmut->kctx->id,
+				(u64)new_page_count);
+		kbase_process_page_usage_inc(mmut->kctx, 1);
+	}
+
+	kbase_atomic_add_pages(1, &kbdev->memdev.used_pages);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+	kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+
+	kunmap(p);
+	return page_to_phys(p);
+
+alloc_free:
+	kbase_mem_pool_free(&kbdev->mem_pool, p, false);
+
+	return 0;
+}
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
+ * new table from the pool if needed and possible
+ */
+static int mmu_get_next_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		phys_addr_t *pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(*pgd);
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(*pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		return -EINVAL;
+	}
+
+	target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		if (!target_pgd) {
+			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
+					__func__);
+			kunmap(p);
+			return -ENOMEM;
+		}
+
+		kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+		kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	*pgd = target_pgd;
+
+	return 0;
+}
+
+/*
+ * Returns the PGD for the specified level of translation
+ */
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
+					struct kbase_mmu_table *mmut,
+					u64 vpfn,
+					unsigned int level,
+					phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+	pgd = mmut->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
+		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kbdev->dev,
+				 "%s: mmu_get_next_pgd failure at level %d\n",
+				 __func__, l);
+			return err;
+		}
+	}
+
+	*out_pgd = pgd;
+
+	return 0;
+}
+
+static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 vpfn,
+		phys_addr_t *out_pgd)
+{
+	return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
+			out_pgd);
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		u64 from_vpfn, u64 to_vpfn)
+{
+	phys_addr_t pgd;
+	u64 vpfn = from_vpfn;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	mmu_mode = kbdev->mmu_mode;
+
+	while (vpfn < to_vpfn) {
+		unsigned int i;
+		unsigned int idx = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
+		unsigned int pcount = 0;
+		unsigned int left = to_vpfn - vpfn;
+		unsigned int level;
+		u64 *page;
+
+		if (count > left)
+			count = left;
+
+		/* need to check if this is a 2MB page or a 4kB */
+		pgd = mmut->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[idx], level))
+				break; /* keep the mapping */
+			kunmap(phys_to_page(pgd));
+			pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(2):
+			/* remap to single entry to update */
+			pcount = 1;
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
+			       __func__, level);
+			goto next;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[idx + i]);
+
+		kbase_mmu_sync_pgd(kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
+				   8 * pcount);
+		kunmap(phys_to_page(pgd));
+
+next:
+		vpfn += count;
+	}
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	bool recover_required = false;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > remain)
+			count = remain;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+					vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu.mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx->kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx->kbdev,
+						&kctx->mmu,
+						recover_vpfn,
+						recover_vpfn + recover_count);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+
+			/* Fail if the current page is a valid ATE entry */
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+
+			mmu_mode->entry_set_ate(&pgd_page[ofs],
+						phys, flags,
+						MIDGARD_MMU_BOTTOMLEVEL);
+		}
+
+		vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = true;
+		recover_count += count;
+	}
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
+}
+
+static inline void cleanup_empty_pte(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 *pte)
+{
+	phys_addr_t tmp_pgd;
+	struct page *tmp_p;
+
+	tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
+	tmp_p = phys_to_page(tmp_pgd);
+	kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false);
+
+	/* If the MMU tables belong to a context then we accounted the memory
+	 * usage to that context, so decrement here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+	}
+	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+				    struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn,
+				    struct tagged_addr *phys, size_t nr,
+				    unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 insert_vpfn = start_vpfn;
+	size_t remain = nr;
+	int err;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	/* Note that 0 is a valid start_vpfn */
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
+
+	mmu_mode = kbdev->mmu_mode;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&mmut->mmu_lock);
+
+	while (remain) {
+		unsigned int i;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
+		struct page *p;
+		unsigned int cur_level;
+
+		if (count > remain)
+			count = remain;
+
+		if (!vindex && is_huge_head(*phys))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+		else
+			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+
+		/*
+		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		do {
+			err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
+						   cur_level, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&mmut->mmu_lock);
+			err = kbase_mem_pool_grow(&kbdev->mem_pool,
+					cur_level);
+			mutex_lock(&mmut->mmu_lock);
+		} while (!err);
+
+		if (err) {
+			dev_warn(kbdev->dev,
+				 "%s: mmu_get_bottom_pgd failure\n", __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
+			}
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "%s: kmap failure\n",
+				 __func__);
+			if (insert_vpfn != start_vpfn) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kbdev,
+						mmut, start_vpfn, insert_vpfn);
+			}
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			unsigned int level_index = (insert_vpfn >> 9) & 0x1FF;
+			u64 *target = &pgd_page[level_index];
+
+			if (mmu_mode->pte_is_valid(*target, cur_level))
+				cleanup_empty_pte(kbdev, mmut, target);
+			mmu_mode->entry_set_ate(target, *phys, flags,
+						cur_level);
+		} else {
+			for (i = 0; i < count; i++) {
+				unsigned int ofs = vindex + i;
+				u64 *target = &pgd_page[ofs];
+
+				/* Warn if the current page is a valid ATE
+				 * entry. The page table shouldn't have anything
+				 * in the place where we are trying to put a
+				 * new entry. Modification to page table entries
+				 * should be performed with
+				 * kbase_mmu_update_pages()
+				 */
+				WARN_ON((*target & 1UL) != 0);
+
+				kbdev->mmu_mode->entry_set_ate(target,
+						phys[i], flags, cur_level);
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
+
+		kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (vindex * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(p);
+	}
+
+	err = 0;
+
+fail_unlock:
+	mutex_unlock(&mmut->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
+ * number 'as_nr'.
+ */
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, u64 vpfn,
+		struct tagged_addr *phys, size_t nr,
+		unsigned long flags, int as_nr)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
+			phys, nr, flags);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+/* Perform a flush/invalidate on a particular address space
+ */
+static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
+		struct kbase_as *as,
+		u64 vpfn, size_t nr, bool sync, bool drain_pending)
+{
+	int err;
+	u32 op;
+
+	if (kbase_pm_context_active_handle_suspend(kbdev,
+				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* GPU is off so there's no need to perform flush/invalidate */
+		return;
+	}
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+			as, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the GPU has hung and
+		 * perform a reset to recover
+		 */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu(kbdev))
+			kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * The transaction lock must be dropped before here
+	 * as kbase_wait_write_flush could take it if
+	 * the GPU was powered down (static analysis doesn't
+	 * know this can't happen).
+	 */
+	drain_pending |= (!err) && sync &&
+		kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367);
+	if (drain_pending) {
+		/* Wait for GPU to flush write buffer */
+		kbase_wait_write_flush(kbdev);
+	}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	kbase_pm_context_idle(kbdev);
+}
+
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+		u64 vpfn, size_t nr, bool sync, int as_nr)
+{
+	/* Skip if there is nothing to do */
+	if (nr) {
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
+					nr, sync, false);
+	}
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev;
+	bool ctx_is_in_runpool;
+	bool drain_pending = false;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	kbdev = kctx->kbdev;
+	mutex_lock(&kbdev->js_data.queue_mutex);
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+	mutex_unlock(&kbdev->js_data.queue_mutex);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
+				vpfn, nr, sync, drain_pending);
+
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+void kbase_mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
+
+	kbdev->mmu_mode->update(kbdev, mmut, as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+	struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
+{
+	phys_addr_t pgd;
+	size_t requested_nr = nr;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err = -EFAULT;
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return 0;
+	}
+
+	mutex_lock(&mmut->mmu_lock);
+
+	mmu_mode = kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int pcount;
+		unsigned int level;
+		u64 *page;
+
+		if (count > nr)
+			count = nr;
+
+		/* need to check if this is a 2MB or a 4kB page */
+		pgd = mmut->pgd;
+
+		for (level = MIDGARD_MMU_TOPLEVEL;
+				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
+			phys_addr_t next_pgd;
+
+			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+			page = kmap(phys_to_page(pgd));
+			if (mmu_mode->ate_is_valid(page[index], level))
+				break; /* keep the mapping */
+			else if (!mmu_mode->pte_is_valid(page[index], level)) {
+				/* nothing here, advance */
+				switch (level) {
+				case MIDGARD_MMU_LEVEL(0):
+					count = 134217728;
+					break;
+				case MIDGARD_MMU_LEVEL(1):
+					count = 262144;
+					break;
+				case MIDGARD_MMU_LEVEL(2):
+					count = 512;
+					break;
+				case MIDGARD_MMU_LEVEL(3):
+					count = 1;
+					break;
+				}
+				if (count > nr)
+					count = nr;
+				goto next;
+			}
+			next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+			kunmap(phys_to_page(pgd));
+			pgd = next_pgd;
+		}
+
+		switch (level) {
+		case MIDGARD_MMU_LEVEL(0):
+		case MIDGARD_MMU_LEVEL(1):
+			dev_warn(kbdev->dev,
+				 "%s: No support for ATEs at level %d\n",
+				 __func__, level);
+			kunmap(phys_to_page(pgd));
+			goto out;
+		case MIDGARD_MMU_LEVEL(2):
+			/* can only teardown if count >= 512 */
+			if (count >= 512) {
+				pcount = 1;
+			} else {
+				dev_warn(kbdev->dev,
+					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
+					 __func__, count);
+				pcount = 0;
+			}
+			break;
+		case MIDGARD_MMU_BOTTOMLEVEL:
+			/* page count is the same as the logical count */
+			pcount = count;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"%s: found non-mapped memory, early out\n",
+				__func__);
+			vpfn += count;
+			nr -= count;
+			continue;
+		}
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < pcount; i++)
+			mmu_mode->entry_invalidate(&page[index + i]);
+
+		kbase_mmu_sync_pgd(kbdev,
+				   kbase_dma_addr(phys_to_page(pgd)) +
+				   8 * index, 8*pcount);
+
+next:
+		kunmap(phys_to_page(pgd));
+		vpfn += count;
+		nr -= count;
+	}
+	err = 0;
+out:
+	mutex_unlock(&mmut->mmu_lock);
+
+	if (mmut->kctx)
+		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true);
+	else
+		kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
+ *
+ * This will update page table entries that already exist on the GPU based on
+ * the new flags that are passed. It is used as a response to the changes of
+ * the memory attributes
+ *
+ * The caller is responsible for validating the memory attributes
+ *
+ * @kctx:  Kbase context
+ * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
+ * @phys:  Tagged physical addresses of the physical pages to replace the
+ *         current mappings
+ * @nr:    Number of pages to update
+ * @flags: Flags
+ */
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+					struct tagged_addr *phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_mmu_mode const *mmu_mode;
+	int err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		do {
+			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+					vpfn, &pgd);
+			if (err != -ENOMEM)
+				break;
+			/* Fill the memory pool with enough pages for
+			 * the page walk to succeed
+			 */
+			mutex_unlock(&kctx->mmu.mmu_lock);
+			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
+					MIDGARD_MMU_BOTTOMLEVEL);
+			mutex_lock(&kctx->mmu.mmu_lock);
+		} while (!err);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "mmu_get_bottom_pgd failure\n");
+			goto fail_unlock;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			err = -ENOMEM;
+			goto fail_unlock;
+		}
+
+		for (i = 0; i < count; i++)
+			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+						flags, MIDGARD_MMU_BOTTOMLEVEL);
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		kbase_mmu_sync_pgd(kctx->kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return err;
+}
+
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr,
+			   unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
+	return err;
+}
+
+static void mmu_teardown_level(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut, phys_addr_t pgd,
+		int level, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	struct page *p;
+	u64 *pgd_page;
+	int i;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize
+	 * kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	mmu_mode = kbdev->mmu_mode;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				mmu_teardown_level(kbdev, mmut,
+						   target_pgd,
+						   level + 1,
+						   pgd_page_buffer +
+						   (PAGE_SIZE / sizeof(u64)));
+			}
+		}
+	}
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	kbase_mem_pool_free(&kbdev->mem_pool, p, true);
+	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+
+	/* If MMU tables belong to a context then pages will have been accounted
+	 * against it, so we must decrement the usage counts here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+	}
+}
+
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		struct kbase_context *kctx)
+{
+	mutex_init(&mmut->mmu_lock);
+	mmut->kctx = kctx;
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	if (mmut->mmu_teardown_pages == NULL)
+		return -ENOMEM;
+
+	mmut->pgd = 0;
+	/* We allocate pages into the kbdev memory pool, then
+	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
+	 * avoid allocations from the kernel happening with the lock held.
+	 */
+	while (!mmut->pgd) {
+		int err;
+
+		err = kbase_mem_pool_grow(&kbdev->mem_pool,
+				MIDGARD_MMU_BOTTOMLEVEL);
+		if (err) {
+			kbase_mmu_term(kbdev, mmut);
+			return -ENOMEM;
+		}
+
+		mutex_lock(&mmut->mmu_lock);
+		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+		mutex_unlock(&mmut->mmu_lock);
+	}
+
+	return 0;
+}
+
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+	if (mmut->pgd) {
+		mutex_lock(&mmut->mmu_lock);
+		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
+				mmut->mmu_teardown_pages);
+		mutex_unlock(&mmut->mmu_lock);
+
+		if (mmut->kctx)
+			KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0);
+	}
+
+	kfree(mmut->mmu_teardown_pages);
+	mutex_destroy(&mmut->mmu_lock);
+}
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+	struct kbase_mmu_mode const *mmu_mode;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->mmu.mmu_lock);
+
+	mmu_mode = kctx->kbdev->mmu_mode;
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
+				target_pgd = mmu_mode->pte_to_phy_addr(
+						pgd_page[i]);
+
+				dump_size = kbasep_mmu_dump_level(kctx,
+						target_pgd, level + 1,
+						buffer, size_left);
+				if (!dump_size) {
+					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					return 0;
+				}
+				size += dump_size;
+			}
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (0 == nr_pages) {
+		/* can't dump in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	mutex_lock(&kctx->mmu.mmu_lock);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer;
+		char *mmu_dump_buffer;
+		u64 config[3];
+		size_t dump_size, size = 0;
+
+		buffer = (char *)kaddr;
+		mmu_dump_buffer = buffer;
+
+		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
+			struct kbase_mmu_setup as_setup;
+
+			kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
+					&as_setup);
+			config[0] = as_setup.transtab;
+			config[1] = as_setup.memattr;
+			config[2] = as_setup.transcfg;
+			memcpy(buffer, &config, sizeof(config));
+			mmu_dump_buffer += sizeof(config);
+			size_left -= sizeof(config);
+			size += sizeof(config);
+		}
+
+		dump_size = kbasep_mmu_dump_level(kctx,
+				kctx->mmu.pgd,
+				MIDGARD_MMU_TOPLEVEL,
+				&mmu_dump_buffer,
+				&size_left);
+
+		if (!dump_size)
+			goto fail_free;
+
+		size += dump_size;
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > (nr_pages * PAGE_SIZE)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			goto fail_free;
+		}
+
+		/* Add the end marker */
+		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
+	}
+
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu.mmu_lock);
+	return NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+	if (WARN_ON(!kctx)) {
+		atomic_dec(&kbdev->faults_pending);
+		return;
+	}
+
+	if (unlikely(faulting_as->protected_mode)) {
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Permission failure");
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+		atomic_dec(&kbdev->faults_pending);
+		return;
+
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->mmu_hw_mutex);
+
+		/* Set the MMU into unmapped mode */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
+					 KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+
+		kbase_pm_context_idle(kbdev);
+	}
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+	atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "PERMISSION_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ACCESS_FLAG";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "ADDRESS_SIZE_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			e = "MEMORY_ATTRIBUTES_FAULT";
+		else
+			e = "UNKNOWN";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	bool reset_status = false;
+#endif
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status: 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n"
+		"pid: %d\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(kbdev, exception_type),
+		access_type, access_type_name(kbdev, as->fault_status),
+		source_id,
+		kctx->pid);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+			(kbdev->hwcnt.backend.state ==
+						KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+				(as->fault_addr < (kbdev->hwcnt.addr +
+						(num_core_groups * 2048))))
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+	kbase_mmu_hw_enable_fault(kbdev, as,
+			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+
+	/* AS transaction begin */
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold hwaccess_lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kctx) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+
+		/* Since no ctx was found, the MMU must be disabled. */
+		WARN_ON(as->current_setup.transtab);
+
+		if (kbase_as_has_bus_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
+		} else if (kbase_as_has_page_fault(as)) {
+			kbase_mmu_hw_clear_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+			kbase_mmu_hw_enable_fault(kbdev, as,
+					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
+		}
+
+#if KBASE_GPU_RESET_EN
+		if (kbase_as_has_bus_fault(as) &&
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+			bool reset_status;
+			/*
+			 * Reset the GPU, like in bus_fault_worker, in case an
+			 * earlier error hasn't been properly cleared by this
+			 * point.
+			 */
+			dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+			reset_status = kbase_prepare_to_reset_gpu_locked(kbdev);
+			if (reset_status)
+				kbase_reset_gpu_locked(kbdev);
+		}
+#endif /* KBASE_GPU_RESET_EN */
+
+		return;
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		/*
+		 * hw counters dumping in progress, signal the
+		 * other thread that it failed
+		 */
+		if ((kbdev->hwcnt.kctx == kctx) &&
+		    (kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_DUMPING))
+			kbdev->hwcnt.backend.state =
+						KBASE_INSTR_STATE_FAULT;
+
+		/*
+		 * Stop the kctx from submitting more jobs and cause it
+		 * to be scheduled out/rescheduled when all references
+		 * to it are released
+		 */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
+			dev_warn(kbdev->dev,
+					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+					as->number, as->fault_addr,
+					as->fault_extra_addr);
+		else
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					as->number, as->fault_addr);
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		WARN_ON(!queue_work(as->pf_wq, &as->work_busfault));
+		atomic_inc(&kbdev->faults_pending);
+	} else {
+		WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault));
+		atomic_inc(&kbdev->faults_pending);
+	}
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+	int i;
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		struct kbase_as *as = &kbdev->as[i];
+
+		flush_workqueue(as->pf_wq);
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 0000000000000..70d5f2becc718
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS,
+	KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED,
+	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
new file mode 100644
index 0000000000000..38ca456477cc1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -0,0 +1,223 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0).
+ * Valid ATE entries at level 3 are flagged with the value 3.
+ * Valid ATE entries at level 0-2 are flagged with the value 1.
+ */
+#define ENTRY_IS_ATE_L3		3ULL
+#define ENTRY_IS_ATE_L02	1ULL
+#define ENTRY_IS_INVAL		2ULL
+#define ENTRY_IS_PTE		3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_ACCESS_RW (1ULL << 6)     /* bits 6:7 */
+#define ENTRY_ACCESS_RO (3ULL << 6)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register.
+	 */
+	setup->memattr =
+		(AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_WRITE_ALLOC           <<
+			(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF   <<
+			(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
+		(AS_MEMATTR_AARCH64_OUTER_WA         <<
+			(AS_MEMATTR_INDEX_OUTER_WA * 8)) |
+		(AS_MEMATTR_AARCH64_NON_CACHEABLE    <<
+			(AS_MEMATTR_INDEX_NON_CACHEABLE * 8));
+
+	setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK;
+	setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
+}
+
+static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
+	else
+		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	/* PTEs cannot exist at the bottom level */
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		return false;
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* Set access flags - note that AArch64 stage 1 does not support
+	 * write-only access, so we use read/write instead
+	 */
+	if (flags & KBASE_REG_GPU_WR)
+		mmu_flags |= ENTRY_ACCESS_RW;
+	else if (flags & KBASE_REG_GPU_RD)
+		mmu_flags |= ENTRY_ACCESS_RO;
+
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3);
+	else
+		page_table_entry_set(entry, as_phys_addr_t(phy) |
+				get_mmu_flags(flags) |
+				ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & PAGE_MASK) |
+			ENTRY_ACCESS_BIT | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const aarch64_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate,
+	.flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
+{
+	return &aarch64_mode;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 0000000000000..f6bdf91dc2258
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,214 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+#include "mali_kbase_defs.h"
+
+#define ENTRY_TYPE_MASK     3ULL
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+		ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE
+	WRITE_ONCE(*pte, phy);
+#else
+#ifdef CONFIG_64BIT
+	barrier();
+	*pte = phy;
+	barrier();
+#elif defined(CONFIG_ARM)
+	barrier();
+	asm volatile("ldrd r0, [%1]\n\t"
+		     "strd r0, %0\n\t"
+		     : "=m" (*pte)
+		     : "r" (&phy)
+		     : "r0", "r1");
+	barrier();
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+#endif
+}
+
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
+		struct kbase_mmu_setup * const setup)
+{
+	/* Set up the required caching policies at the correct indices
+	 * in the memattr register. */
+	setup->memattr =
+		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
+		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
+		0; /* The other indices are unused for now */
+
+	setup->transtab = ((u64)mmut->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
+
+	setup->transcfg = 0;
+}
+
+static void mmu_update(struct kbase_device *kbdev,
+		struct kbase_mmu_table *mmut,
+		int as_nr)
+{
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+		return;
+
+	as = &kbdev->as[as_nr];
+	current_setup = &as->current_setup;
+
+	mmu_get_as_setup(mmut, current_setup);
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	struct kbase_as * const as = &kbdev->as[as_nr];
+	struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate, unsigned int level)
+{
+	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte, unsigned int level)
+{
+	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+	unsigned long memattr_idx;
+
+	memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
+	if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
+			"Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
+		memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
+	/* store mem_attr index as 4:2, noting that:
+	 * - macro called above ensures 3 bits already
+	 * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
+	 */
+	mmu_flags = memattr_idx << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry,
+		struct tagged_addr phy,
+		unsigned long flags,
+		unsigned int level)
+{
+	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
+			     ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+	page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+	page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+	.update = mmu_update,
+	.get_as_setup = mmu_get_as_setup,
+	.disable_as = mmu_disable_as,
+	.pte_to_phy_addr = pte_to_phy_addr,
+	.ate_is_valid = ate_is_valid,
+	.pte_is_valid = pte_is_valid,
+	.entry_set_ate = entry_set_ate,
+	.entry_set_pte = entry_set_pte,
+	.entry_invalidate = entry_invalidate,
+	.flags = 0
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+	return &lpae_mode;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 0000000000000..fbb090e6c21f3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_register(void)
+{
+	struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_register);
+
+void kbase_platform_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_unregister);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 0000000000000..d5b8c776e74e6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_vinstr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+	return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			mutex_unlock(&js_devdata->runpool_mutex);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	if (c == 1) {
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_hwaccess_pm_gpu_active(kbdev);
+	}
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	if (kbdev->ipa.gpu_active_callback)
+		kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
+#endif
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	int c;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_hwaccess_pm_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
+	/* IPA may be using vinstr, in which case there may be one PM reference
+	 * still held when all other contexts have left the GPU. Inform IPA that
+	 * the GPU is now idle so that vinstr can drop it's reference.
+	 *
+	 * If the GPU was only briefly active then it might have gone idle
+	 * before vinstr has taken a PM reference, meaning that active_count is
+	 * zero. We still need to inform IPA in this case, so that vinstr can
+	 * drop the PM reference and avoid keeping the GPU powered
+	 * unnecessarily.
+	 */
+	if (c <= 1 && kbdev->ipa.gpu_idle_callback)
+		kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
+#endif
+
+	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = true;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	/* MUST happen before any pm_context_active calls occur */
+	kbase_hwaccess_pm_resume(kbdev);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 0000000000000..59a031467c95c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS       0x01
+#define PM_HW_ISSUES_DETECT  0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags     Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline
+ * function
+ */
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif				/* _KBASE_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 0000000000000..15bca79fd64d6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 0000000000000..763740ec0ae7b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,135 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 0000000000000..a0078cb8600df
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 0000000000000..92101fec8d5e5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1156 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct fragment_job {
+	struct job_descriptor_header header;
+
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_descriptor_header *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next_job._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next_job._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		u64 fbd_address, u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbase_vmap_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            true if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		u64 job_header,	u64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	bool job_64)
+{
+	struct kbase_vmap_struct map;
+	u64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbase_vmap(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return -EINVAL;
+		}
+
+		fbd_address = *job_ext;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    true if this job is the first in the chain
+ * @param[in] fragment_chain    true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+		u64 *job_header, u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool first_in_chain, bool fragment_chain)
+{
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
+	u64 new_job_header;
+	struct kbase_vmap_struct map;
+
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return -EINVAL;
+	}
+	job = &frag_job->header;
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->exception_status = JOB_NOT_STARTED;
+
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
+
+	if (job->job_type == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->job_barrier = 1;
+
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
+
+	job->job_index += hw_job_id_offset;
+
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
+	} else {
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != 0)
+			goto out_unmap;
+
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
+		else
+			fbd_address = (u64)frag_job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != 0)
+				goto out_unmap;
+		}
+	}
+
+	kbase_vunmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return 0;
+
+out_unmap:
+	kbase_vunmap(kctx, &map);
+	return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		u64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return -EINVAL;
+		}
+
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
+
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
+		else
+			jc = job->next_job._32;
+
+		kbase_vunmap(kctx, &map);
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+		u64 jc,	u64 prev_jc,
+		u64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		bool fragment_chain)
+{
+	bool first_in_chain = true;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != 0)
+			return -EINVAL;
+
+		first_in_chain = false;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      base_jd_prio prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list = 0;
+	atom->device_nr = 0;
+	atom->prio = prio;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom,
+		base_jd_prio prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return -EINVAL;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return -EINVAL;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr,
+			     BASE_JD_DEP_TYPE_DATA);
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	u64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbase_vmap_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbase_vunmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload = NULL;
+	u64 next;
+	u64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	int ret = -EINVAL;
+	struct kbase_vmap_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbase_vmap_struct jc_map;
+		u64 jc;
+
+		jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbase_vunmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != 0)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, false) != 0) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			true) != 0) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = 0;
+
+out:
+	kbase_vunmap(kctx, &map);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+	base_jd_prio atom_prio;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+	if (kbasep_replay_create_atoms(
+			kctx, &t_atom, &f_atom, atom_prio) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		need_to_try_schedule_context |= jd_done_nolock(katom, NULL);
+	}
+
+	if (need_to_try_schedule_context)
+		kbase_js_sched_all(kctx->kbdev);
+
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	u64 job_header;
+	u64 job_loop_detect;
+	struct job_descriptor_header *job;
+	struct kbase_vmap_struct job_map;
+	struct kbase_vmap_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (u64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbase_vunmap(kctx, &map);
+			return false;
+		}
+
+
+		dump_job_head(kctx, "\njob_head structure:\n", job);
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
+			err = true;
+			kbase_vunmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
+		else
+			job_header = job->next_job._32;
+
+		kbase_vunmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbase_vunmap(kctx, &map);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	/* Don't replay this atom if these issues are not present in the
+	 * hardware */
+	if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) &&
+			!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) {
+		dev_dbg(kbdev->dev, "Hardware does not need replay workaround");
+
+		/* Signal failure to userspace */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+
+		return false;
+	}
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (!kbase_replay_fault_check(katom)) {
+		dev_dbg(kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 0000000000000..b53e4bd4af950
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,88 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+#include <linux/compiler.h>
+
+/*
+ * This is used to ensure the compiler did actually allocate the register we
+ * asked it for some inline assembly sequences.  Apparently we can't trust the
+ * compiler from one version to another so a bit of paranoia won't hurt.  This
+ * string is meant to be concatenated with the inline asm string and will
+ * cause compilation to stop on mismatch.  (for details, see gcc PR 15089)
+ */
+#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+
+static noinline u64 invoke_smc_fid(u64 function_id,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	register u64 x0 asm("x0") = function_id;
+	register u64 x1 asm("x1") = arg0;
+	register u64 x2 asm("x2") = arg1;
+	register u64 x3 asm("x3") = arg2;
+
+	asm volatile(
+			__asmeq("%0", "x0")
+			__asmeq("%1", "x1")
+			__asmeq("%2", "x2")
+			__asmeq("%3", "x3")
+			"smc    #0\n"
+			: "+r" (x0)
+			: "r" (x1), "r" (x2), "r" (x3));
+
+	return x0;
+}
+
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
+{
+	/* Is fast call (bit 31 set) */
+	KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+	/* bits 16-23 must be zero for fast calls */
+	KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+	return invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2)
+{
+	u32 fid = 0;
+
+	/* Only the six bits allowed should be used. */
+	KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+	fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+	if (smc64)
+		fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */
+	fid |= oen; /* Bit 29:24: OEN */
+	/* Bit 23:16: Must be zero for fast calls */
+	fid |= (function_number); /* Bit 15:0: function number */
+
+	return kbase_invoke_smc_fid(fid, arg0, arg1, arg2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 0000000000000..221eb21a8c7f3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+#define SMC_64 (1 << 30)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @fid: The SMC function to call, see SMC Calling convention.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC.
+  */
+u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
+
+/**
+  * kbase_invoke_smc_fid - Perform a secure monitor call
+  * @oen: Owning Entity number (SIP, STD etc).
+  * @function_number: The function number within the OEN.
+  * @smc64: use SMC64 calling convention instead of SMC32.
+  * @arg0: First argument to the SMC.
+  * @arg1: Second argument to the SMC.
+  * @arg2: Third argument to the SMC.
+  *
+  * See SMC Calling Convention for details.
+  *
+  * Return: the return value from the SMC call.
+  */
+u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
+		u64 arg0, u64 arg1, u64 arg2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 0000000000000..b774c3b4e7c6a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,1679 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+#include <mali_kbase_sync.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/cache.h>
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_vmap_struct map;
+	void *user_result;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	u64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
+		return pm_active_err;
+	}
+
+	kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+									&ts);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
+		return 0;
+
+	memcpy(user_result, &data, sizeof(data));
+
+	kbase_vunmap(kctx, &map);
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+/* Called by the explicit fence mechanism when a fence wait has completed */
+void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+#endif
+
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				/* Found blocked trigger fence. */
+				struct kbase_sync_fence_info info;
+
+				if (!kbase_sync_fence_in_info_get(dep, &info)) {
+					dev_warn(dev,
+						 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+						 kbase_jd_atom_id(kctx, dep),
+						 info.fence,
+						 info.name,
+						 kbase_sync_status_string(info.status));
+				 }
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	unsigned long lflags;
+	struct kbase_sync_fence_info info;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	if (kbase_sync_fence_in_info_get(katom, &info)) {
+		/* Fence must have signaled just after timeout. */
+		spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+		return;
+	}
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 info.fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 info.fence, info.name,
+		 kbase_sync_status_string(info.status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	kbase_sync_fence_in_dump(katom);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(struct timer_list *timer)
+{
+	struct kbase_context *kctx = container_of(timer, struct kbase_context,
+			soft_job_timeout);
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (gpu_alloc) {
+			switch (gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->softjob_data = NULL;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+	katom->softjob_data = buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret) {
+		ret = -EFAULT;
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, user_extres.ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		if (NULL == reg || NULL == reg->gpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		buffers[i].nr_extres_pages = reg->nr_pages;
+
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	kfree(user_buffers);
+
+	return ret;
+}
+
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
+	int ret = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	size_t dma_to_copy;
+#endif
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	if (!gpu_alloc) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE)
+			dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch");
+
+		dma_to_copy = min(dma_buf->size,
+			(size_t)(buf_data->nr_extres_pages * PAGE_SIZE));
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+				0, dma_to_copy,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
+	unsigned int i;
+
+	if (WARN_ON(!buffers))
+		return -EINVAL;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
+
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+					struct base_jit_alloc_info *info)
+{
+	/* If the ID is zero, then fail the job */
+	if (info->id == 0)
+		return -EINVAL;
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages)
+		return -EINVAL;
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0)
+		return -EINVAL;
+
+	if (kctx->jit_version == 1) {
+		/* Old JIT didn't have usage_id, max_allocations, bin_id
+		 * or padding, so force them to zero
+		 */
+		info->usage_id = 0;
+		info->max_allocations = 0;
+		info->bin_id = 0;
+		info->flags = 0;
+		memset(info->padding, 0, sizeof(info->padding));
+	} else {
+		int j;
+
+		/* Check padding is all zeroed */
+		for (j = 0; j < sizeof(info->padding); j++) {
+			if (info->padding[j] != 0) {
+				return -EINVAL;
+			}
+		}
+
+		/* No bit other than TILER_ALIGN_TOP shall be set */
+		if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	struct kbase_context *kctx = katom->kctx;
+	u32 count;
+	int ret;
+	u32 i;
+
+	/* For backwards compatibility */
+	if (katom->nr_extres == 0)
+		katom->nr_extres = 1;
+	count = katom->nr_extres;
+
+	/* Sanity checks */
+	if (!data || count > kctx->jit_max_allocations ||
+			count > ARRAY_SIZE(kctx->jit_alloc)) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kmalloc_array(count, sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	if (copy_from_user(info, data, sizeof(*info)*count) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+	katom->softjob_data = info;
+
+	for (i = 0; i < count; i++, info++) {
+		ret = kbasep_jit_alloc_validate(kctx, info);
+		if (ret)
+			goto free_info;
+	}
+
+	katom->jit_blocked = false;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
+fail:
+	return ret;
+}
+
+static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom)
+{
+	if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) !=
+				BASE_JD_REQ_SOFT_JIT_FREE))
+		return NULL;
+
+	return (u8 *) katom->softjob_data;
+}
+
+static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct list_head *target_list_head = NULL;
+	struct kbase_jd_atom *entry;
+
+	list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) {
+		if (katom->age < entry->age) {
+			target_list_head = &entry->queue;
+			break;
+		}
+	}
+
+	if (target_list_head == NULL)
+		target_list_head = &kctx->jit_pending_alloc;
+
+	list_add_tail(&katom->queue, target_list_head);
+}
+
+static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr, new_addr;
+	u32 count = katom->nr_extres;
+	u32 i;
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+	if (WARN_ON(!info)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return 0;
+	}
+
+	for (i = 0; i < count; i++, info++) {
+		/* The JIT ID is still in use so fail the allocation */
+		if (kctx->jit_alloc[info->id]) {
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+	}
+
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		if (kctx->jit_alloc[info->id]) {
+			/* The JIT ID is duplicated in this atom. Roll back
+			 * previous allocations and fail.
+			 */
+			u32 j;
+
+			info = katom->softjob_data;
+			for (j = 0; j < i; j++, info++) {
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+			}
+
+			katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+			return 0;
+		}
+
+		/* Create a JIT allocation */
+		reg = kbase_jit_allocate(kctx, info);
+		if (!reg) {
+			struct kbase_jd_atom *jit_atom;
+			bool can_block = false;
+
+			lockdep_assert_held(&kctx->jctx.lock);
+
+			jit_atom = list_first_entry(&kctx->jit_atoms_head,
+					struct kbase_jd_atom, jit_node);
+
+			list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+				if (jit_atom == katom)
+					break;
+
+				if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+						BASE_JD_REQ_SOFT_JIT_FREE) {
+					u8 *free_ids = kbase_jit_free_get_ids(jit_atom);
+
+					if (free_ids && *free_ids &&
+						kctx->jit_alloc[*free_ids]) {
+						/* A JIT free which is active and
+						 * submitted before this atom
+						 */
+						can_block = true;
+						break;
+					}
+				}
+			}
+
+			if (!can_block) {
+				/* Mark the failed allocation as well as the
+				 * other un-attempted allocations in the set,
+				 * so we know they are in use even if the
+				 * allocation itself failed.
+				 */
+				for (; i < count; i++, info++) {
+					kctx->jit_alloc[info->id] =
+						(struct kbase_va_region *) -1;
+				}
+
+				katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+				return 0;
+			}
+
+			/* There are pending frees for an active allocation
+			 * so we should wait to see whether they free the
+			 * memory. Add to the list of atoms for which JIT
+			 * allocation is pending.
+			 */
+			kbase_jit_add_to_pending_alloc_list(katom);
+			katom->jit_blocked = true;
+
+			/* Rollback, the whole set will be re-attempted */
+			while (i-- > 0) {
+				info--;
+				kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+				kctx->jit_alloc[info->id] = NULL;
+			}
+
+			return 1;
+		}
+
+		/* Bind it to the user provided ID. */
+		kctx->jit_alloc[info->id] = reg;
+	}
+
+	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		/*
+		 * Write the address of the JIT allocation to the user provided
+		 * GPU allocation.
+		 */
+		ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+				&mapping);
+		if (!ptr) {
+			/*
+			 * Leave the allocations "live" as the JIT free atom
+			 * will be submitted anyway.
+			 */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return 0;
+		}
+
+		reg = kctx->jit_alloc[info->id];
+		new_addr = reg->start_pfn << PAGE_SHIFT;
+		*ptr = new_addr;
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+				katom, info->gpu_alloc_addr, new_addr);
+		kbase_vunmap(kctx, &mapping);
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	lockdep_assert_held(&katom->kctx->jctx.lock);
+
+	if (WARN_ON(!katom->softjob_data))
+		return;
+
+	/* Remove atom from jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	if (katom->jit_blocked) {
+		list_del(&katom->queue);
+		katom->jit_blocked = false;
+	}
+
+	info = katom->softjob_data;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	u8 *ids;
+	u32 count = MAX(katom->nr_extres, 1);
+	int ret;
+
+	/* Sanity checks */
+	if (count > ARRAY_SIZE(kctx->jit_alloc)) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL);
+	if (!ids) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	lockdep_assert_held(&kctx->jctx.lock);
+	katom->softjob_data = ids;
+
+	/* For backwards compatibility */
+	if (katom->nr_extres) {
+		/* Fail the job if there is no list of ids */
+		if (!data) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+
+		if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) {
+			ret = -EINVAL;
+			goto free_info;
+		}
+	} else {
+		katom->nr_extres = 1;
+		*ids = (u8)katom->jc;
+	}
+
+	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
+
+	return 0;
+
+free_info:
+	kfree(katom->softjob_data);
+	katom->softjob_data = NULL;
+fail:
+	return ret;
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 *ids = kbase_jit_free_get_ids(katom);
+	u32 count = katom->nr_extres;
+	u32 i;
+
+	if (ids == NULL) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	for (i = 0; i < count; i++, ids++) {
+		/*
+		 * If the ID is zero or it is not in use yet then fail the job.
+		 */
+		if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			return;
+		}
+	}
+}
+
+static void kbasep_jit_free_finish_worker(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_finish_soft_job(katom);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
+{
+	struct list_head *i, *tmp;
+	struct kbase_context *kctx = katom->kctx;
+	LIST_HEAD(jit_pending_alloc_list);
+	u8 *ids;
+	size_t j;
+
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	ids = kbase_jit_free_get_ids(katom);
+	if (WARN_ON(ids == NULL)) {
+		return;
+	}
+
+	/* Remove this atom from the kctx->jit_atoms_head list */
+	list_del(&katom->jit_node);
+
+	for (j = 0; j != katom->nr_extres; ++j) {
+		if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) {
+			/*
+			 * If the ID is valid but the allocation request failed
+			 * still succeed this soft job but don't try and free
+			 * the allocation.
+			 */
+			if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1)
+				kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]);
+
+			kctx->jit_alloc[ids[j]] = NULL;
+		}
+	}
+	/* Free the list of ids */
+	kfree(ids);
+
+	list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list);
+
+	list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
+		struct kbase_jd_atom *pending_atom = list_entry(i,
+				struct kbase_jd_atom, queue);
+		if (kbase_jit_allocate_process(pending_atom) == 0) {
+			/* Atom has completed */
+			INIT_WORK(&pending_atom->work,
+					kbasep_jit_free_finish_worker);
+			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
+		}
+	}
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	katom->softjob_data = ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = katom->softjob_data;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (i > 0) {
+		u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+
+		--i;
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = katom->softjob_data;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	int ret = 0;
+
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
+
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		ret = kbase_dump_cpu_gpu_time(katom);
+		break;
+
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		katom->event_code = kbase_sync_fence_out_trigger(katom,
+				katom->event_code == BASE_JD_EVENT_DONE ?
+								0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+	{
+		ret = kbase_sync_fence_in_wait(katom);
+
+		if (ret == 1) {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			kbasep_add_waiting_with_timeout(katom);
+#else
+			kbasep_add_waiting_soft_job(katom);
+#endif
+		}
+		break;
+	}
+#endif
+
+	case BASE_JD_REQ_SOFT_REPLAY:
+		ret = kbase_replay_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		ret = kbasep_soft_event_wait(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		ret = kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
+	}
+
+	/* Atom is complete */
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
+	return ret;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_sync_fence_in_cancel_wait(katom);
+		break;
+#endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (!IS_ALIGNED(katom->jc, cache_line_size()))
+				return -EINVAL;
+		}
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			fd = kbase_sync_fence_out_create(katom,
+							 fence.basep.stream_fd);
+			if (fd < 0)
+				return -EINVAL;
+
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				kbase_sync_fence_out_remove(katom);
+				kbase_sync_fence_close_fd(fd);
+				fence.basep.fd = -EINVAL;
+				return -EINVAL;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+			int ret;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return -EINVAL;
+
+			/* Get a reference to the fence object */
+			ret = kbase_sync_fence_in_from_fd(katom,
+							  fence.basep.fd);
+			if (ret < 0)
+				return ret;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+			/*
+			 * Set KCTX_NO_IMPLICIT_FENCE in the context the first
+			 * time a soft fence wait job is observed. This will
+			 * prevent the implicit dma-buf fence to conflict with
+			 * the Android native sync fences.
+			 */
+			if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC))
+				kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC);
+#endif /* CONFIG_MALI_DMA_FENCE */
+		}
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		return kbase_jit_free_prepare(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
+	default:
+		/* Unsupported soft-job */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		/* If fence has not yet been signaled, do it now */
+		kbase_sync_fence_out_trigger(katom, katom->event_code ==
+				BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release katom's reference to fence object */
+		kbase_sync_fence_in_remove(katom);
+		break;
+#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_finish(katom);
+		break;
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	bool resched = false;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list,
+			&local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/*
+	 * Each atom must be detached from the list and ran separately -
+	 * it could be re-added to the old list, but this is unlikely
+	 */
+	list_for_each_entry_safe(katom_iter, tmp_iter,
+			&local_suspended_soft_jobs, dep_item[1]) {
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		kbasep_remove_waiting_soft_job(katom_iter);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter, NULL);
+		} else {
+			KBASE_DEBUG_ASSERT((katom_iter->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbase_js_sched_all(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.c b/drivers/gpu/arm/midgard/mali_kbase_strings.c
new file mode 100644
index 0000000000000..22caa4a6d8147
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_strings.c
@@ -0,0 +1,28 @@
+ /*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include "mali_kbase_strings.h"
+
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
+const char kbase_drv_name[] = KBASE_DRV_NAME;
+const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.h b/drivers/gpu/arm/midgard/mali_kbase_strings.h
new file mode 100644
index 0000000000000..d2f1825314fe7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_strings.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+extern const char kbase_drv_name[];
+extern const char kbase_timeline_name[];
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 0000000000000..70557dd5b33f2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,212 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ * This file contains our internal "API" for explicit fences.
+ * It hides the implementation details of the actual explicit fence mechanism
+ * used (Android fences or sync file with DMA fences).
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include <linux/syscalls.h>
+#ifdef CONFIG_SYNC
+#include <sync.h>
+#endif
+#ifdef CONFIG_SYNC_FILE
+#include "mali_kbase_fence_defs.h"
+#include <linux/sync_file.h>
+#endif
+
+#include "mali_kbase.h"
+
+/**
+ * struct kbase_sync_fence_info - Information about a fence
+ * @fence: Pointer to fence (type is void*, as underlaying struct can differ)
+ * @name: The name given to this fence when it was created
+ * @status: < 0 means error, 0 means active, 1 means signaled
+ *
+ * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get()
+ * to get the information.
+ */
+struct kbase_sync_fence_info {
+	void *fence;
+	char name[32];
+	int status;
+};
+
+/**
+ * kbase_sync_fence_stream_create() - Create a stream object
+ * @name: Name of stream (only used to ease debugging/visualization)
+ * @out_fd: A file descriptor representing the created stream object
+ *
+ * Can map down to a timeline implementation in some implementations.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
+
+/**
+ * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * @katom: Atom to assign the new explicit fence to
+ * @stream_fd: File descriptor for stream object to create fence on
+ *
+ * return: Valid file descriptor to fence or < 0 on error
+ */
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
+
+/**
+ * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * @katom: Atom to assign the existing explicit fence to
+ * @fd: File descriptor to an existing fence
+ *
+ * Assigns an explicit input fence to atom.
+ * This can later be waited for by calling @kbase_sync_fence_in_wait
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
+
+/**
+ * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ * @fd: File descriptor to check
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ *
+ * return 0: if fd is for a valid fence, < 0 if invalid
+ */
+int kbase_sync_fence_validate(int fd);
+
+/**
+ * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom
+ * @katom: Atom with an explicit fence to signal
+ * @result: < 0 means signal with error, 0 >= indicates success
+ *
+ * Signal output fence attached on katom and remove the fence from the atom.
+ *
+ * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ */
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
+
+/**
+ * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled
+ * @katom: Atom with explicit fence to wait for
+ *
+ * If the fence is already signaled, then 0 is returned, and the caller must
+ * continue processing of the katom.
+ *
+ * If the fence isn't already signaled, then this kbase_sync framework will
+ * take responsibility to continue the processing once the fence is signaled.
+ *
+ * return: 0 if already signaled, otherwise 1
+ */
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits
+ * @katom: Atom to cancel wait for
+ *
+ * This function is fully responsible for continuing processing of this atom
+ * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all)
+ */
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_in_remove() - Remove the input fence from the katom
+ * @katom: Atom to remove explicit input fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_out_remove() - Remove the output fence from the katom
+ * @katom: Atom to remove explicit output fence for
+ *
+ * This will also release the corresponding reference.
+ */
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
+
+/**
+ * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence
+ * @fd: File descriptor to close
+ */
+static inline void kbase_sync_fence_close_fd(int fd)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
+	ksys_close(fd);
+#else
+	sys_close(fd);
+#endif
+}
+
+/**
+ * kbase_sync_fence_in_info_get() - Retrieves information about input fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_fence_out_info_get() - Retrieves information about output fence
+ * @katom: Atom to get fence information from
+ * @info: Struct to be filled with fence information
+ *
+ * return: 0 on success, < 0 on error
+ */
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info);
+
+/**
+ * kbase_sync_status_string() - Get string matching @status
+ * @status: Value of fence status.
+ *
+ * return: Pointer to string describing @status.
+ */
+const char *kbase_sync_status_string(int status);
+
+/*
+ * Internal worker used to continue processing of atom.
+ */
+void kbase_sync_fence_wait_worker(struct work_struct *data);
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+/**
+ * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * @katom: Atom to trigger fence debug dump for
+ */
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
+#endif
+
+#endif /* MALI_KBASE_SYNC_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
new file mode 100644
index 0000000000000..75940fb08a05d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_android.c
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Android fences (CONFIG_SYNC)
+ * Known to be good for kernels 4.5 and earlier.
+ * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels
+ * (see mali_kbase_sync_file.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signaled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatibility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+	return pt->parent;
+}
+#endif
+
+static struct mali_sync_timeline *to_mali_sync_timeline(
+						struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt),
+						sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signaled) - mpt->order;
+
+	if (diff >= 0)
+		return (result < 0) ? result : 1;
+
+	return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff == 0)
+		return 0;
+
+	return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signaled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+static struct sync_timeline *mali_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops,
+				  sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signaled, 0);
+
+	return tl;
+}
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	if (!out_fd)
+		return -EINVAL;
+
+	tl = mali_sync_timeline_alloc(name);
+	if (!tl)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ */
+static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent,
+					    sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+	katom->fence = sync_fence_fdget(fd);
+	if (katom->fence == NULL) {
+		/* The only way the fence can be NULL is if userspace closed it
+		 * for us, so we don't need to clear it up */
+		fd = -EINVAL;
+		goto out;
+	}
+
+out:
+	fput(tl_file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+	katom->fence = sync_fence_fdget(fd);
+	return katom->fence ? 0 : -ENOENT;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (!fence)
+		return -EINVAL;
+
+	sync_fence_put(fence);
+	return 0;
+}
+
+/* Returns true if the specified timeline is allocated by Mali */
+static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are
+ * allocated.
+ *
+ * If they are signaled in the wrong order then a message will be printed in
+ * debug builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as
+ * success.
+ */
+static void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(
+							sync_pt_parent(pt));
+	int signaled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signaled = atomic_read(&mtl->signaled);
+
+		diff = signaled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signaling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signaled,
+				signaled, mpt->order) != signaled);
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!katom->fence)
+		return BASE_JD_EVENT_JOB_CANCELLED;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+	if (katom->fence->num_fences != 1) {
+#endif
+		/* Not exactly one item in the list - so it didn't (directly)
+		 * come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	pt = list_first_entry(&katom->fence->pt_list_head,
+			      struct sync_pt, pt_list);
+#else
+	pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+	timeline = sync_pt_parent(pt);
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+	if (!fence)
+		return -ENOENT;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence,
+				      struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter,
+					struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (kbase_fence_get_status(fence) < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the
+	 * job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding
+	 * kctx->jctx.lock and the callbacks are run synchronously from
+	 * sync_timeline_signal. So we simply defer the work.
+	 */
+
+	INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signaled */
+		return 0;
+	}
+
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1;
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for
+		 * kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	if (katom->fence) {
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+	}
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+	if (!katom->fence)
+		return -ENOENT;
+
+	info->fence = katom->fence;
+	info->status = kbase_fence_get_status(katom->fence);
+	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	return 0;
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	if (katom->fence)
+		sync_fence_wait(katom->fence, 1);
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
new file mode 100644
index 0000000000000..5239daee409e2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -0,0 +1,49 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * @file mali_kbase_sync_common.c
+ *
+ * Common code for our explicit fence functionality
+ */
+
+#include <linux/workqueue.h>
+#include "mali_kbase.h"
+#include "mali_kbase_sync.h"
+
+void kbase_sync_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kbase_soft_event_wait_callback(katom);
+}
+
+const char *kbase_sync_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
new file mode 100644
index 0000000000000..bb94aee3ccba7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -0,0 +1,361 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE)
+ * Introduced in kernel 4.9.
+ * Android explicit fences (CONFIG_SYNC) can be used for older kernels
+ * (see mali_kbase_sync_android.c)
+ */
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/sync_file.h>
+#include <linux/slab.h>
+#include "mali_kbase_fence_defs.h"
+#include "mali_kbase_sync.h"
+#include "mali_kbase_fence.h"
+#include "mali_kbase.h"
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE
+};
+
+int kbase_sync_fence_stream_create(const char *name, int *const out_fd)
+{
+	if (!out_fd)
+		return -EINVAL;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, NULL,
+				   O_RDONLY | O_CLOEXEC);
+	if (*out_fd < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct sync_file *sync_file;
+	int fd;
+
+	fence = kbase_fence_out_new(katom);
+	if (!fence)
+		return -ENOMEM;
+
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+	/* Take an extra reference to the fence on behalf of the sync_file.
+	 * This is only needed on older kernels where sync_file_create()
+	 * does not take its own reference. This was changed in v4.9.68,
+	 * where sync_file_create() now takes its own reference.
+	 */
+	dma_fence_get(fence);
+#endif
+
+	/* create a sync_file fd representing the fence */
+	sync_file = sync_file_create(fence);
+	if (!sync_file) {
+		dma_fence_put(fence);
+		kbase_fence_out_remove(katom);
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		fput(sync_file->file);
+		kbase_fence_out_remove(katom);
+		return fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	return fd;
+}
+
+int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -ENOENT;
+
+	kbase_fence_fence_in_set(katom, fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_validate(int fd)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence = sync_file_get_fence(fd);
+#else
+	struct dma_fence *fence = sync_file_get_fence(fd);
+#endif
+
+	if (!fence)
+		return -EINVAL;
+
+	dma_fence_put(fence);
+
+	return 0; /* valid */
+}
+
+enum base_jd_event_code
+kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result)
+{
+	int res;
+
+	if (!kbase_fence_out_is_ours(katom)) {
+		/* Not our fence */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	res = kbase_fence_out_signal(katom, result);
+	if (unlikely(res < 0)) {
+		dev_warn(katom->kctx->kbdev->dev,
+				"fence_signal() failed with %d\n", res);
+	}
+
+	kbase_sync_fence_out_remove(katom);
+
+	return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_fence_wait_callback(struct fence *fence,
+				      struct fence_cb *cb)
+#else
+static void kbase_fence_wait_callback(struct dma_fence *fence,
+				      struct dma_fence_cb *cb)
+#endif
+{
+	struct kbase_fence_cb *kcb = container_of(cb,
+				struct kbase_fence_cb,
+				fence_cb);
+	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Cancel atom if fence is erroneous */
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error)
+#else
+	if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0)
+#endif
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	if (kbase_fence_dep_count_dec_and_test(katom)) {
+		/* We take responsibility of handling this */
+		kbase_fence_dep_count_set(katom, -1);
+
+		/* To prevent a potential deadlock we schedule the work onto the
+		 * job_done_wq workqueue
+		 *
+		 * The issue is that we may signal the timeline while holding
+		 * kctx->jctx.lock and the callbacks are run synchronously from
+		 * sync_timeline_signal. So we simply defer the work.
+		 */
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(kctx->jctx.job_done_wq, &katom->work);
+	}
+}
+
+int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom)
+{
+	int err;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return 0; /* no input fence to wait for, good to go! */
+
+	kbase_fence_dep_count_set(katom, 1);
+
+	err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback);
+
+	kbase_fence_put(fence);
+
+	if (likely(!err)) {
+		/* Test if the callbacks are already triggered */
+		if (kbase_fence_dep_count_dec_and_test(katom)) {
+			kbase_fence_free_callbacks(katom);
+			kbase_fence_dep_count_set(katom, -1);
+			return 0; /* Already signaled, good to go right now */
+		}
+
+		/* Callback installed, so we just need to wait for it... */
+	} else {
+		/* Failure */
+		kbase_fence_free_callbacks(katom);
+		kbase_fence_dep_count_set(katom, -1);
+
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+
+		INIT_WORK(&katom->work, kbase_sync_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+	return 1; /* completion to be done later by callback/worker */
+}
+
+void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (!kbase_fence_free_callbacks(katom)) {
+		/* The wait wasn't cancelled -
+		 * leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Take responsibility of completion */
+	kbase_fence_dep_count_set(katom, -1);
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_out_remove(katom);
+}
+
+void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
+{
+	kbase_fence_free_callbacks(katom);
+	kbase_fence_in_remove(katom);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+static void kbase_sync_fence_info_get(struct fence *fence,
+				      struct kbase_sync_fence_info *info)
+#else
+static void kbase_sync_fence_info_get(struct dma_fence *fence,
+				      struct kbase_sync_fence_info *info)
+#endif
+{
+	info->fence = fence;
+
+	/* translate into CONFIG_SYNC status:
+	 * < 0 : error
+	 * 0 : active
+	 * 1 : signaled
+	 */
+	if (dma_fence_is_signaled(fence)) {
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \
+	 (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE))
+		int status = fence->error;
+#else
+		int status = fence->status;
+#endif
+		if (status < 0)
+			info->status = status; /* signaled with error */
+		else
+			info->status = 1; /* signaled with success */
+	} else  {
+		info->status = 0; /* still active (unsignaled) */
+	}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+	scnprintf(info->name, sizeof(info->name), "%u#%u",
+		  fence->context, fence->seqno);
+#else
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
+#endif
+}
+
+int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
+				 struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_in_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
+				  struct kbase_sync_fence_info *info)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+
+	fence = kbase_fence_out_get(katom);
+	if (!fence)
+		return -ENOENT;
+
+	kbase_sync_fence_info_get(fence, info);
+
+	kbase_fence_put(fence);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom)
+{
+	/* Not implemented */
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 0000000000000..2ff45f50bf16d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,2638 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT       16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_OBJ_HEADER,
+	TL_STREAM_TYPE_OBJ_SUMMARY,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX_HEADER,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
+
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
+	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	/* Timeline object events. */
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+
+	/* Job dump specific events. */
+	KBASE_JD_GPU_SOFT_RESET
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: counter tracking stream's autoflush state
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
+ */
+struct tl_stream {
+	spinlock_t lock;
+
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
+
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id:        tracepoint ID identifying message in stream
+ * @id_str:    human readable version of tracepoint ID
+ * @name:      tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+	u32        id;
+	const char *id_str;
+	const char *name;
+	const char *arg_types;
+	const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+	.release = kbasep_tlstream_release,
+	.read    = kbasep_tlstream_read,
+	.poll    = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+	{
+		KBASE_TL_NEW_CTX,
+		__stringify(KBASE_TL_NEW_CTX),
+		"object ctx is created",
+		"@pII",
+		"ctx,ctx_nr,tgid"
+	},
+	{
+		KBASE_TL_NEW_GPU,
+		__stringify(KBASE_TL_NEW_GPU),
+		"object gpu is created",
+		"@pII",
+		"gpu,gpu_id,core_count"
+	},
+	{
+		KBASE_TL_NEW_LPU,
+		__stringify(KBASE_TL_NEW_LPU),
+		"object lpu is created",
+		"@pII",
+		"lpu,lpu_nr,lpu_fn"
+	},
+	{
+		KBASE_TL_NEW_ATOM,
+		__stringify(KBASE_TL_NEW_ATOM),
+		"object atom is created",
+		"@pI",
+		"atom,atom_nr"
+	},
+	{
+		KBASE_TL_NEW_AS,
+		__stringify(KBASE_TL_NEW_AS),
+		"address space object is created",
+		"@pI",
+		"address_space,as_nr"
+	},
+	{
+		KBASE_TL_DEL_CTX,
+		__stringify(KBASE_TL_DEL_CTX),
+		"context is destroyed",
+		"@p",
+		"ctx"
+	},
+	{
+		KBASE_TL_DEL_ATOM,
+		__stringify(KBASE_TL_DEL_ATOM),
+		"atom is destroyed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_LIFELINK_LPU_GPU,
+		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
+		"lpu is deleted with gpu",
+		"@pp",
+		"lpu,gpu"
+	},
+	{
+		KBASE_TL_LIFELINK_AS_GPU,
+		__stringify(KBASE_TL_LIFELINK_AS_GPU),
+		"address space is deleted with gpu",
+		"@pp",
+		"address_space,gpu"
+	},
+	{
+		KBASE_TL_RET_CTX_LPU,
+		__stringify(KBASE_TL_RET_CTX_LPU),
+		"context is retained by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_RET_ATOM_CTX,
+		__stringify(KBASE_TL_RET_ATOM_CTX),
+		"atom is retained by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_LPU,
+		__stringify(KBASE_TL_RET_ATOM_LPU),
+		"atom is retained by lpu",
+		"@pps",
+		"atom,lpu,attrib_match_list"
+	},
+	{
+		KBASE_TL_NRET_CTX_LPU,
+		__stringify(KBASE_TL_NRET_CTX_LPU),
+		"context is released by lpu",
+		"@pp",
+		"ctx,lpu"
+	},
+	{
+		KBASE_TL_NRET_ATOM_CTX,
+		__stringify(KBASE_TL_NRET_ATOM_CTX),
+		"atom is released by context",
+		"@pp",
+		"atom,ctx"
+	},
+	{
+		KBASE_TL_NRET_ATOM_LPU,
+		__stringify(KBASE_TL_NRET_ATOM_LPU),
+		"atom is released by lpu",
+		"@pp",
+		"atom,lpu"
+	},
+	{
+		KBASE_TL_RET_AS_CTX,
+		__stringify(KBASE_TL_RET_AS_CTX),
+		"address space is retained by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_NRET_AS_CTX,
+		__stringify(KBASE_TL_NRET_AS_CTX),
+		"address space is released by context",
+		"@pp",
+		"address_space,ctx"
+	},
+	{
+		KBASE_TL_RET_ATOM_AS,
+		__stringify(KBASE_TL_RET_ATOM_AS),
+		"atom is retained by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_NRET_ATOM_AS,
+		__stringify(KBASE_TL_NRET_ATOM_AS),
+		"atom is released by address space",
+		"@pp",
+		"atom,address_space"
+	},
+	{
+		KBASE_TL_DEP_ATOM_ATOM,
+		__stringify(KBASE_TL_DEP_ATOM_ATOM),
+		"atom2 depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
+		"atom job slot attributes",
+		"@pLLI",
+		"atom,descriptor,affinity,config"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITY,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
+		"atom priority",
+		"@pI",
+		"atom,prio"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_STATE,
+		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
+		"atom state",
+		"@pI",
+		"atom,state"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITIZED),
+		"atom caused priority change",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_ATTRIB_ATOM_JIT,
+		__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
+		"jit done for atom",
+		"@pLL",
+		"atom,edit_addr,new_addr"
+	},
+	{
+		KBASE_TL_ATTRIB_AS_CONFIG,
+		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
+		"address space attributes",
+		"@pLLL",
+		"address_space,transtab,memattr,transcfg"
+	},
+	{
+		KBASE_TL_EVENT_LPU_SOFTSTOP,
+		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
+		"softstop event on given lpu",
+		"@p",
+		"lpu"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
+		"atom softstopped",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
+		"atom softstop issued",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START),
+		"atom soft job has started",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END),
+		"atom soft job has completed",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_JD_GPU_SOFT_RESET,
+		__stringify(KBASE_JD_GPU_SOFT_RESET),
+		"gpu soft reset",
+		"@p",
+		"gpu"
+	},
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+	{
+		KBASE_AUX_PM_STATE,
+		__stringify(KBASE_AUX_PM_STATE),
+		"PM state",
+		"@IL",
+		"core_type,core_state_bitset"
+	},
+	{
+		KBASE_AUX_PAGEFAULT,
+		__stringify(KBASE_AUX_PAGEFAULT),
+		"Page fault",
+		"@IL",
+		"ctx_nr,page_cnt_change"
+	},
+	{
+		KBASE_AUX_PAGESALLOC,
+		__stringify(KBASE_AUX_PAGESALLOC),
+		"Total alloc pages change",
+		"@IL",
+		"ctx_nr,page_cnt"
+	},
+	{
+		KBASE_AUX_DEVFREQ_TARGET,
+		__stringify(KBASE_AUX_DEVFREQ_TARGET),
+		"New device frequency target",
+		"@L",
+		"target_freq"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_START,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
+		"enter protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_ENTER_END,
+		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
+		"enter protected mode end",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_START,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
+		"leave protected mode start",
+		"@p",
+		"gpu"
+	},
+	{
+		KBASE_AUX_PROTECTED_LEAVE_END,
+		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
+		"leave protected mode end",
+		"@p",
+		"gpu"
+	}
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ * @bytes:  pointer to buffer holding data
+ * @len:    length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer:         buffer where data will be written
+ * @pos:            position in the buffer where to place data
+ * @string:         pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator. */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos:    position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+	u64 timestamp = kbasep_tlstream_get_timestamp();
+
+	return kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word:   pointer to the words being modified
+ * @value:  value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+		u32          *word,
+		u32          value,
+		unsigned int bitpos,
+		unsigned int bitlen)
+{
+	const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+	KBASE_DEBUG_ASSERT(word);
+	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+	*word &= ~mask;
+	*word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer:     pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type:   packet's type
+ * @pkt_class:  packet's class
+ * @stream_id:  stream id
+ * @numbered:   non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+		char                  *buffer,
+		enum tl_packet_family pkt_family,
+		enum tl_packet_class  pkt_class,
+		enum tl_packet_type   pkt_type,
+		unsigned int          stream_id,
+		int                   numbered)
+{
+	u32 word0 = 0;
+	u32 word1 = 0;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+	KBASE_DEBUG_ASSERT(
+			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
+			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+			(pkt_type == TL_PACKET_TYPE_BODY));
+	KBASE_DEBUG_ASSERT(
+			(pkt_class == TL_PACKET_CLASS_OBJ) ||
+			(pkt_class == TL_PACKET_CLASS_AUX));
+
+	kbasep_tlstream_put_bits(
+			&word0, pkt_family,
+			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_class,
+			PACKET_CLASS_POS, PACKET_CLASS_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, pkt_type,
+			PACKET_TYPE_POS, PACKET_TYPE_LEN);
+	kbasep_tlstream_put_bits(
+			&word0, stream_id,
+			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+	if (numbered)
+		kbasep_tlstream_put_bits(
+				&word1, 1,
+				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+	memcpy(&buffer[0],             &word0, sizeof(word0));
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer:    pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+		char   *buffer,
+		size_t data_size)
+{
+	u32 word0;
+	u32 word1;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	CSTD_UNUSED(word0);
+
+	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+	kbasep_tlstream_put_bits(
+			&word1, data_size,
+			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer:  pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+
+	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream:  pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+	unsigned int i;
+
+	for (i = 0; i < PACKET_COUNT; i++) {
+		if (stream->numbered)
+			atomic_set(
+					&stream->buffer[i].size,
+					PACKET_HEADER_SIZE +
+					PACKET_NUMBER_SIZE);
+		else
+			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+	}
+
+	atomic_set(&stream->wbi, 0);
+	atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream:      pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+		struct tl_stream    *stream,
+		enum tl_stream_type stream_type)
+{
+	unsigned int i;
+
+	KBASE_DEBUG_ASSERT(stream);
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	spin_lock_init(&stream->lock);
+
+	/* All packets carrying tracepoints shall be numbered. */
+	if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+		stream->numbered = 1;
+	else
+		stream->numbered = 0;
+
+	for (i = 0; i < PACKET_COUNT; i++)
+		kbasep_tlstream_packet_header_setup(
+				stream->buffer[i].data,
+				tl_stream_cfg[stream_type].pkt_family,
+				tl_stream_cfg[stream_type].pkt_class,
+				tl_stream_cfg[stream_type].pkt_type,
+				tl_stream_cfg[stream_type].stream_id,
+				stream->numbered);
+
+	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+	KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream:     pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size:    length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning:  User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+		struct tl_stream *stream,
+		unsigned int      wb_idx_raw,
+		unsigned int      wb_size)
+{
+	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
+	kbasep_tlstream_packet_header_update(
+			stream->buffer[wb_idx].data,
+			wb_size - PACKET_HEADER_SIZE);
+
+	if (stream->numbered)
+		kbasep_tlstream_packet_number_update(
+				stream->buffer[wb_idx].data,
+				wb_idx_raw);
+
+	/* Increasing write buffer index will expose this packet to the reader.
+	 * As stream->lock is not taken on reader side we must make sure memory
+	 * is updated correctly before this will happen. */
+	smp_wmb();
+	wb_idx_raw++;
+	atomic_set(&stream->wbi, wb_idx_raw);
+
+	/* Inform user that packets are ready for reading. */
+	wake_up_interruptible(&tl_event_queue);
+
+	/* Detect and mark overflow in this stream. */
+	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+		/* Reader side depends on this increment to correctly handle
+		 * overflows. The value shall be updated only if it was not
+		 * modified by the reader. The data holding buffer will not be
+		 * updated before stream->lock is released, however size of the
+		 * buffer will. Make sure this increment is globally visible
+		 * before information about selected write buffer size. */
+		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+	}
+
+	wb_size = PACKET_HEADER_SIZE;
+	if (stream->numbered)
+		wb_size += PACKET_NUMBER_SIZE;
+
+	return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size:    message size
+ * @flags:       pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while stream is locked
+ *          (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+		enum tl_stream_type stream_type,
+		size_t              msg_size,
+		unsigned long       *flags) __acquires(&stream->lock)
+{
+	struct tl_stream *stream;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(
+			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+			msg_size);
+
+	stream = tl_stream[stream_type];
+
+	spin_lock_irqsave(&stream->lock, *flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	/* Select next buffer if data will not fit into current one. */
+	if (PACKET_SIZE < wb_size + msg_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx  = (wb_idx_raw + 1) % PACKET_COUNT;
+	}
+
+	/* Reserve space in selected buffer. */
+	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+	atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+	return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type:  type of the stream that shall be locked
+ * @flags:        value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+		enum tl_stream_type stream_type,
+		unsigned long       flags) __releases(&stream->lock)
+{
+	struct tl_stream *stream;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+	stream = tl_stream[stream_type];
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
+
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype:  type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+	struct tl_stream *stream = tl_stream[stype];
+	unsigned long    flags;
+	unsigned int     wb_idx_raw;
+	unsigned int     wb_idx;
+	size_t           wb_size;
+	size_t           min_size = PACKET_HEADER_SIZE;
+
+	if (stream->numbered)
+		min_size += PACKET_NUMBER_SIZE;
+
+	spin_lock_irqsave(&stream->lock, flags);
+
+	wb_idx_raw = atomic_read(&stream->wbi);
+	wb_idx     = wb_idx_raw % PACKET_COUNT;
+	wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+	if (wb_size > min_size) {
+		wb_size = kbasep_tlstream_msgbuf_submit(
+				stream, wb_idx_raw, wb_size);
+		wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+		atomic_set(&stream->buffer[wb_idx].size, wb_size);
+	}
+	spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @timer: unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+
+	CSTD_UNUSED(timer);
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+		struct tl_stream *stream = tl_stream[stype];
+		unsigned long    flags;
+		unsigned int     wb_idx_raw;
+		unsigned int     wb_idx;
+		size_t           wb_size;
+		size_t           min_size = PACKET_HEADER_SIZE;
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		if (stream->numbered)
+			min_size += PACKET_NUMBER_SIZE;
+
+		spin_lock_irqsave(&stream->lock, flags);
+
+		wb_idx_raw = atomic_read(&stream->wbi);
+		wb_idx     = wb_idx_raw % PACKET_COUNT;
+		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
+
+		if (wb_size > min_size) {
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
+		}
+		spin_unlock_irqrestore(&stream->lock, flags);
+	}
+
+	if (atomic_read(&autoflush_timer_active))
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype:      pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+		enum tl_stream_type *stype,
+		unsigned int        *rb_idx_raw)
+{
+	int pending = 0;
+
+	KBASE_DEBUG_ASSERT(stype);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (
+			*stype = 0;
+			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
+			(*stype)++) {
+		if (NULL != tl_stream[*stype]) {
+			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+			/* Read buffer index may be updated by writer in case of
+			 * overflow. Read and write buffer indexes must be
+			 * loaded in correct order. */
+			smp_rmb();
+			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+				pending = 1;
+		}
+	}
+	(*stype)--;
+
+	return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp:   pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size:   maximum amount of data that can be stored in the buffer
+ * @f_pos:  pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((0 > *f_pos) || (PACKET_SIZE > size))
+		return -EINVAL;
+
+	mutex_lock(&tl_reader_lock);
+
+	while (copy_len < size) {
+		enum tl_stream_type stype;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		/* If we don't have any data yet, wait for packet to be
+		 * submitted. If we already read some packets and there is no
+		 * packet pending return back to user. */
+		if (0 < copy_len) {
+			if (!kbasep_tlstream_packet_pending(
+						&stype,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						tl_event_queue,
+						kbasep_tlstream_packet_pending(
+							&stype,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content. */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					tl_stream[stype]->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the rbi still points to the packet we just processed
+		 * then there was no overflow so we add the copied size to
+		 * copy_len and move rbi on to the next packet
+		 */
+		smp_rmb();
+		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+			copy_len += rb_size;
+			atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+		}
+	}
+
+	mutex_unlock(&tl_reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+	enum tl_stream_type stream_type;
+	unsigned int        rb_idx;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	poll_wait(filp, &tl_event_queue, wait);
+	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	CSTD_UNUSED(inode);
+	CSTD_UNUSED(filp);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
+	return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc:     pointer to array with tracepoint descriptors
+ * @tp_count:    number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+		enum tl_stream_type  stream_type,
+		const struct tp_desc *tp_desc,
+		u32                  tp_count)
+{
+	const u8      tv = SWTRACE_VERSION; /* protocol version */
+	const u8      ps = sizeof(void *); /* pointer size */
+	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+	char          *buffer;
+	size_t        pos = 0;
+	unsigned long flags;
+	unsigned int  i;
+
+	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+	KBASE_DEBUG_ASSERT(tp_desc);
+
+	/* Calculate the size of the timeline message. */
+	for (i = 0; i < tp_count; i++) {
+		msg_size += sizeof(tp_desc[i].id);
+		msg_size +=
+			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].name,      STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+		msg_size +=
+			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+			sizeof(char) + sizeof(u32);
+	}
+
+	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tp_count, sizeof(tp_count));
+
+	for (i = 0; i < tp_count; i++) {
+		pos = kbasep_tlstream_write_bytes(
+				buffer, pos,
+				&tp_desc[i].id, sizeof(tp_desc[i].id));
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].id_str, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].name, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_types, msg_size - pos);
+		pos = kbasep_tlstream_write_string(
+				buffer, pos,
+				tp_desc[i].arg_names, msg_size - pos);
+	}
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+	/* We don't expect any more data to be read in this stream.
+	 * As header stream must be read before its associated body stream,
+	 * make this packet visible to the user straightaway. */
+	kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+	enum tl_stream_type i;
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+		if (!tl_stream[i])
+			break;
+		kbasep_timeline_stream_init(tl_stream[i], i);
+	}
+	if (TL_STREAM_TYPE_COUNT > i) {
+		for (; i > 0; i--) {
+			kbasep_timeline_stream_term(tl_stream[i - 1]);
+			kfree(tl_stream[i - 1]);
+		}
+		return -ENOMEM;
+	}
+
+	/* Initialize autoflush timer. */
+	atomic_set(&autoflush_timer_active, 0);
+	kbase_timer_setup(&autoflush_timer,
+			  kbasep_tlstream_autoflush_timer_callback);
+
+	return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+	enum tl_stream_type i;
+
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+		kbasep_timeline_stream_term(tl_stream[i]);
+		kfree(tl_stream[i]);
+	}
+}
+
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+	struct kbase_device             *kbdev = kctx->kbdev;
+	unsigned int                    lpu_id;
+	unsigned int                    as_nr;
+	struct kbasep_kctx_list_element *element;
+
+	/* Create LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
+	}
+
+	/* Create Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
+	}
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Create object for each known context. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry(element, &kbdev->kctx_list, link) {
+		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
+				element->kctx,
+				element->kctx->id,
+				(u32)(element->kctx->tgid));
+	}
+	/* Before releasing the lock, reset body stream buffers.
+	 * This will prevent context creation message to be directed to both
+	 * summary and body stream.
+	 */
+	kbase_tlstream_reset_body_streams();
+	mutex_unlock(&kbdev->kctx_list_lock);
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_tlstream_flush_streams();
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				kctx,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(&kbase_tlstream_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+		kbasep_timeline_stream_reset(
+				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_OBJ_HEADER,
+				tp_desc_obj,
+				ARRAY_SIZE(tp_desc_obj));
+		kbasep_tlstream_timeline_header(
+				TL_STREAM_TYPE_AUX_HEADER,
+				tp_desc_aux,
+				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient. */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kctx->kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kctx);
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_OBJ]);
+	kbasep_timeline_stream_reset(
+			tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+	KBASE_DEBUG_ASSERT(bytes_collected);
+	KBASE_DEBUG_ASSERT(bytes_generated);
+	*bytes_collected = atomic_read(&tlstream_bytes_collected);
+	*bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+	const u32     msg_id = KBASE_TL_NEW_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+		sizeof(core_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &id, sizeof(id));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_count, sizeof(core_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+	const u32     msg_id = KBASE_TL_NEW_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+		sizeof(fn);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &fn, sizeof(fn));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+{
+	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ_SUMMARY,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
+{
+	const u32     msg_id = KBASE_TL_NEW_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+	const u32     msg_id = KBASE_TL_NEW_ATOM;
+	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+			sizeof(nr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &nr, sizeof(nr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(void *context)
+{
+	const u32     msg_id = KBASE_TL_DEL_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_del_atom(void *atom)
+{
+	const u32     msg_id = KBASE_TL_DEL_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+			strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t  msg_size =
+			sizeof(msg_id) + sizeof(u64) +
+			sizeof(atom) + sizeof(lpu) + msg_s0;
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	pos = kbasep_tlstream_write_string(
+			buffer, pos, attrib_match_list, msg_s0);
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &context, sizeof(context));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+{
+	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &ctx, sizeof(ctx));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+{
+	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
+		sizeof(jd) + sizeof(affinity) + sizeof(config);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &jd, sizeof(jd));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &affinity, sizeof(affinity));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &config, sizeof(config));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &prio, sizeof(prio));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom)
+		+ sizeof(edit_addr) + sizeof(new_addr);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &new_addr, sizeof(new_addr));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg)
+{
+	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
+		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &as, sizeof(as));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transtab, sizeof(transtab));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &memattr, sizeof(memattr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &transcfg, sizeof(transcfg));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+{
+	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &lpu, sizeof(lpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom)
+{
+	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom, sizeof(atom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+{
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+	const u32     msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count_change);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos,
+			&page_count_change, sizeof(page_count_change));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+	const u32     msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+		sizeof(page_count);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &page_count, sizeof(page_count));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
+{
+	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t    msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
+	unsigned long   flags;
+	char            *buffer;
+	size_t          pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &target_freq, sizeof(target_freq));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_enter_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+void __kbase_tlstream_aux_protected_leave_end(void *gpu)
+{
+	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &gpu, sizeof(gpu));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 0000000000000..bfa25d98264ad
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,644 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx:  kernel common context
+ * @flags: timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg:   if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+		unsigned int tpw_count,
+		unsigned int msg_delay,
+		unsigned int msg_count,
+		int          aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
+void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+		void *atom, u64 edit_addr, u64 new_addr);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
+void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(void *gpu);
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & TLSTREAM_ENABLED)                     \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
+	do {                                                            \
+		int enabled = atomic_read(&kbase_tlstream_enabled);     \
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
+			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
+	} while (0)
+
+#define __TRACE_IF_ENABLED_JD(trace_name, ...)                      \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)    \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
+ *                                     summary
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
+ * @gpu:        name of the GPU object
+ * @id:         id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr:  sequential number assigned to this LPU
+ * @fn:  property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
+ * @as: name of the address space object
+ * @nr: sequential number assigned to this address space
+ *
+ * Function emits a timeline message informing about address space creation.
+ * Address space is created with one attribute: number identifying this
+ * address space.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
+ * @as:  name of the address space object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that address space object
+ * shall be deleted along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
+ * @context: name of the context object
+ * @nr:      context number
+ * @tgid:    thread Group Id
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr:   sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
+ * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
+ * @atom:    name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu:  name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being held by the context object.
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
+ * @as:  name of the address space object
+ * @ctx: name of the context object
+ *
+ * Function emits a timeline message informing that address space object
+ * is being released by atom.
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by address space and must not be deleted unless it is released.
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
+ * @atom: name of the atom object
+ * @as:   name of the address space object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by address space.
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
+ * @atom:     name of the atom object
+ * @jd:       job descriptor address
+ * @affinity: job affinity
+ * @config:   job config
+ *
+ * Function emits a timeline message containing atom attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ * @atom: name of the atom object
+ * @prio: atom priority
+ *
+ * Function emits a timeline message containing atom priority.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ * @atom:  name of the atom object
+ * @state: atom state
+ *
+ * Function emits a timeline message containing atom state.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom was prioritized
+ * @atom:  name of the atom object
+ *
+ * Function emits a timeline message signalling priority change
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(atom) \
+	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom
+ * @atom:       atom identifier
+ * @edit_addr:  address edited by jit
+ * @new_addr:   address placed into the edited location
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
+	__TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
+ * @as:       assigned address space
+ * @transtab: configuration of the TRANSTAB register
+ * @memattr:  configuration of the MEMATTR register
+ * @transcfg: configuration of the TRANSCFG register (or zero if not present)
+ *
+ * Function emits a timeline message containing address space attributes.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP
+ * @lpu:        name of the LPU object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
+	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END
+ * @atom:       atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \
+	__TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
+ * @gpu:        name of the GPU object
+ *
+ * This imperative tracepoint is specific to job dumping.
+ * Function emits a timeline message indicating GPU soft reset.
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
+
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
+ *                                resulting in new pages being mapped
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
+ *                                 pages is changed
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
+ *                                     frequency
+ * @target_freq: new target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
+	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
+ *                                            to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
+ *                                          to protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
+ *                                            to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU is starting to
+ * transition to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
+ *                                          to non-protected mode
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message indicating the GPU has finished
+ * transitioning to non-protected mode.
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
+	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 0000000000000..d7364d5d8c1fe
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,263 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 0000000000000..3ea234aabeecd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return true;
+
+		pos = pos->next;
+	}
+	return false;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 0000000000000..f2e5a3381e13c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          true if entry is a member of base
+ *                  false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+	setup_timer(timer, (void (*)(unsigned long)) callback,
+			(unsigned long) timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+	#ifdef ASSIGN_ONCE
+		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+	#else
+		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+	#endif
+#endif
+
+#ifndef READ_ONCE
+	#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 0000000000000..df936cfdd4d84
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,2361 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/hrtimer.h>
+#include <linux/jiffies.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
+
+/*****************************************************************************/
+
+/* Hwcnt reader API version */
+#define HWCNT_READER_API        1
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC            1000000000ull /* ns */
+
+/* The time resolution of dumping service. */
+#define DUMPING_RESOLUTION      500000ull /* ns */
+
+/* The maximal supported number of dumping buffers. */
+#define MAX_BUFFER_COUNT        32
+
+/* Size and number of hw counters blocks. */
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK        64
+#define NR_BYTES_PER_CNT        4
+#define NR_BYTES_PER_HDR        16
+#define PRFCNT_EN_MASK_OFFSET   0x8
+
+/*****************************************************************************/
+
+enum {
+	SHADER_HWCNT_BM,
+	TILER_HWCNT_BM,
+	MMU_L2_HWCNT_BM,
+	JM_HWCNT_BM
+};
+
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
+/**
+ * struct kbase_vinstr_context - vinstr context per device
+ * @lock:              protects the entire vinstr context, but the list of
+ *                     vinstr clients can be updated outside the lock using
+ *                     @state_lock.
+ * @kbdev:             pointer to kbase device
+ * @kctx:              pointer to kbase context
+ * @vmap:              vinstr vmap for mapping hwcnt dump buffer
+ * @gpu_va:            GPU hwcnt dump buffer address
+ * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
+ * @dump_size:         size of the dump buffer in bytes
+ * @bitmap:            current set of counters monitored, not always in sync
+ *                     with hardware
+ * @reprogram:         when true, reprogram hwcnt block with the new set of
+ *                     counters
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state and list of
+ *                     clients.
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
+ * @nclients:          number of attached clients, pending or idle
+ * @nclients_suspended: number of attached but suspended clients
+ * @waiting_clients:   head of list of clients being periodically sampled
+ * @idle_clients:      head of list of clients being idle
+ * @suspended_clients: head of list of clients being suspended
+ * @thread:            periodic sampling thread
+ * @waitq:             notification queue of sampling thread
+ * @request_pending:   request for action for sampling thread
+ * @clients_present:   when true, we have at least one client
+ *                     Note: this variable is in sync. with nclients and is
+ *                     present to preserve simplicity. Protected by state_lock.
+ * @need_suspend:      when true, a suspend has been requested while a resume is
+ *                     in progress. Resume worker should queue a suspend.
+ * @need_resume:       when true, a resume has been requested while a suspend is
+ *                     in progress. Suspend worker should queue a resume.
+ * @forced_suspend:    when true, the suspend of vinstr needs to take place
+ *                     regardless of the kernel/user space clients attached
+ *                     to it. In particular, this flag is set when the suspend
+ *                     of vinstr is requested on entering protected mode or at
+ *                     the time of device suspend.
+ */
+struct kbase_vinstr_context {
+	struct mutex             lock;
+	struct kbase_device      *kbdev;
+	struct kbase_context     *kctx;
+
+	struct kbase_vmap_struct *vmap;
+	u64                      gpu_va;
+	void                     *cpu_va;
+	size_t                   dump_size;
+	u32                      bitmap[4];
+	bool                     reprogram;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
+
+	u32                      nclients;
+	u32                      nclients_suspended;
+	struct list_head         waiting_clients;
+	struct list_head         idle_clients;
+	struct list_head         suspended_clients;
+
+	struct task_struct       *thread;
+	wait_queue_head_t        waitq;
+	atomic_t                 request_pending;
+
+	bool                     clients_present;
+
+	bool                     need_suspend;
+	bool                     need_resume;
+	bool                     forced_suspend;
+};
+
+/**
+ * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
+ * @vinstr_ctx:    vinstr context client is attached to
+ * @list:          node used to attach this client to list in vinstr context
+ * @buffer_count:  number of buffers this client is using
+ * @event_mask:    events this client reacts to
+ * @dump_size:     size of one dump buffer in bytes
+ * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
+ * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
+ * @accum_buffer:  temporary accumulation buffer for preserving counters
+ * @dump_time:     next time this clients shall request hwcnt dump
+ * @dump_interval: interval between periodic hwcnt dumps
+ * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
+ * @dump_buffers_meta: metadata of dump buffers
+ * @meta_idx:      index of metadata being accessed by userspace
+ * @read_idx:      index of buffer read by userspace
+ * @write_idx:     index of buffer being written by dumping service
+ * @waitq:         client's notification queue
+ * @pending:       when true, client has attached but hwcnt not yet updated
+ * @suspended:     when true, client is suspended
+ */
+struct kbase_vinstr_client {
+	struct kbase_vinstr_context        *vinstr_ctx;
+	struct list_head                   list;
+	unsigned int                       buffer_count;
+	u32                                event_mask;
+	size_t                             dump_size;
+	u32                                bitmap[4];
+	void __user                        *legacy_buffer;
+	void                               *kernel_buffer;
+	void                               *accum_buffer;
+	u64                                dump_time;
+	u32                                dump_interval;
+	char                               *dump_buffers;
+	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
+	atomic_t                           meta_idx;
+	atomic_t                           read_idx;
+	atomic_t                           write_idx;
+	wait_queue_head_t                  waitq;
+	bool                               pending;
+	bool                               suspended;
+};
+
+/**
+ * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
+ * @hrtimer:    high resolution timer
+ * @vinstr_ctx: vinstr context
+ */
+struct kbasep_vinstr_wake_up_timer {
+	struct hrtimer              hrtimer;
+	struct kbase_vinstr_context *vinstr_ctx;
+};
+
+/*****************************************************************************/
+
+static void kbase_vinstr_update_suspend(
+		struct kbase_vinstr_context *vinstr_ctx);
+
+static int kbasep_vinstr_service_task(void *data);
+
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+		struct file *filp,
+		poll_table  *wait);
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+		struct file   *filp,
+		unsigned int  cmd,
+		unsigned long arg);
+static int kbasep_vinstr_hwcnt_reader_mmap(
+		struct file           *filp,
+		struct vm_area_struct *vma);
+static int kbasep_vinstr_hwcnt_reader_release(
+		struct inode *inode,
+		struct file  *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations vinstr_client_fops = {
+	.poll           = kbasep_vinstr_hwcnt_reader_poll,
+	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
+	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
+	.mmap           = kbasep_vinstr_hwcnt_reader_mmap,
+	.release        = kbasep_vinstr_hwcnt_reader_release,
+};
+
+/*****************************************************************************/
+
+static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_ioctl_hwcnt_enable enable;
+	int err;
+
+	enable.dump_buffer = vinstr_ctx->gpu_va;
+	enable.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
+	enable.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
+	enable.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
+	enable.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
+}
+
+static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+}
+
+static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
+{
+	disable_hwcnt(vinstr_ctx);
+	return enable_hwcnt(vinstr_ctx);
+}
+
+static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
+}
+
+static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
+{
+	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
+	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
+	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
+	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
+{
+	size_t dump_size;
+
+#ifndef CONFIG_MALI_NO_MALI
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		u32 nr_cg;
+
+		nr_cg = kbdev->gpu_props.num_core_groups;
+		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	} else
+#endif /* CONFIG_MALI_NO_MALI */
+	{
+		/* assume v5 for now */
+#ifdef CONFIG_MALI_NO_MALI
+		u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+		u64 core_mask =
+			(1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+		base_gpu_props *props = &kbdev->gpu_props.props;
+		u32 nr_l2 = props->l2_props.num_l2_slices;
+		u64 core_mask = props->coherency_info.group[0].core_mask;
+#endif
+		u32 nr_blocks = fls64(core_mask);
+
+		/* JM and tiler counter blocks are always present */
+		dump_size = (2 + nr_l2 + nr_blocks) *
+				NR_CNT_PER_BLOCK *
+				NR_BYTES_PER_CNT;
+	}
+	return dump_size;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
+
+static int kbasep_vinstr_map_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_va_region *reg;
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	u64 flags, nr_pages;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR |
+		BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU;
+	if (kctx->kbdev->mmu_mode->flags &
+			KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+		flags |= BASE_MEM_UNCACHED_GPU;
+	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	nr_pages = PFN_UP(vinstr_ctx->dump_size);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+			&vinstr_ctx->gpu_va);
+	if (!reg)
+		return -ENOMEM;
+
+	vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx,
+			vinstr_ctx->gpu_va, &vinstr_ctx->vmap);
+
+	if (!vinstr_ctx->cpu_va) {
+		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void kbasep_vinstr_unmap_kernel_dump_buffer(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+
+	kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap);
+	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
+}
+
+/**
+ * kbasep_vinstr_create_kctx - create kernel context for vinstr
+ * @vinstr_ctx: vinstr context
+ * Return: zero on success
+ */
+static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element = NULL;
+	unsigned long flags;
+	bool enable_backend = false;
+	int err;
+
+	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
+	if (!vinstr_ctx->kctx)
+		return -ENOMEM;
+
+	/* Map the master kernel dump buffer.  The HW dumps the counters
+	 * into this memory region. */
+	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
+	if (err)
+		goto failed_map;
+
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		KBASE_TLSTREAM_TL_NEW_CTX(
+				vinstr_ctx->kctx,
+				vinstr_ctx->kctx->id,
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	vinstr_ctx->clients_present = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+	if (err)
+		goto failed_enable;
+
+	vinstr_ctx->thread = kthread_run(
+			kbasep_vinstr_service_task,
+			vinstr_ctx,
+			"mali_vinstr_service");
+	if (IS_ERR(vinstr_ctx->thread)) {
+		err = PTR_ERR(vinstr_ctx->thread);
+		goto failed_kthread;
+	}
+
+	return 0;
+
+failed_kthread:
+	disable_hwcnt(vinstr_ctx);
+failed_enable:
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+	if (element) {
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_del(&element->link);
+		kfree(element);
+		mutex_unlock(&kbdev->kctx_list_lock);
+		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+	}
+failed_map:
+	kbase_destroy_context(vinstr_ctx->kctx);
+	vinstr_ctx->kctx = NULL;
+	return err;
+}
+
+/**
+ * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+	bool                            hwcnt_disabled = false;
+	unsigned long                   flags;
+
+	/* Release hw counters dumping resources. */
+	vinstr_ctx->thread = NULL;
+
+	/* Simplify state transitions by specifying that we have no clients. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->clients_present = false;
+	if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
+		hwcnt_disabled = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	if (!hwcnt_disabled)
+		disable_hwcnt(vinstr_ctx);
+
+	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Destroy context. */
+	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Inform timeline client about context destruction. */
+	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
+
+	vinstr_ctx->kctx = NULL;
+}
+
+/**
+ * kbasep_vinstr_attach_client - Attach a client to the vinstr core
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
+ *
+ * Return: vinstr opaque client handle or NULL on failure
+ */
+static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
+		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
+		u32 bitmap[4], void *argp, void *kernel_buffer)
+{
+	struct task_struct         *thread = NULL;
+	struct kbase_vinstr_client *cli;
+	unsigned long flags;
+	bool clients_present = false;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
+
+	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
+	if (!cli)
+		return NULL;
+
+	cli->vinstr_ctx   = vinstr_ctx;
+	cli->buffer_count = buffer_count;
+	cli->event_mask   =
+		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
+		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
+	cli->pending      = true;
+
+	hwcnt_bitmap_set(cli->bitmap, bitmap);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
+	vinstr_ctx->reprogram = true;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* If this is the first client, create the vinstr kbase
+	 * context. This context is permanently resident until the
+	 * last client exits. */
+	if (!clients_present) {
+		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
+		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
+			goto error;
+
+		vinstr_ctx->reprogram = false;
+		cli->pending = false;
+	}
+
+	/* The GPU resets the counter block every time there is a request
+	 * to dump it. We need a per client kernel buffer for accumulating
+	 * the counters. */
+	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
+	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+	if (!cli->accum_buffer)
+		goto error;
+
+	/* Prepare buffers. */
+	if (cli->buffer_count) {
+		int *fd = (int *)argp;
+		size_t tmp;
+
+		/* Allocate area for buffers metadata storage. */
+		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
+			cli->buffer_count;
+		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
+		if (!cli->dump_buffers_meta)
+			goto error;
+
+		/* Allocate required number of dumping buffers. */
+		cli->dump_buffers = (char *)__get_free_pages(
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(cli->dump_size * cli->buffer_count));
+		if (!cli->dump_buffers)
+			goto error;
+
+		/* Create descriptor for user-kernel data exchange. */
+		*fd = anon_inode_getfd(
+				"[mali_vinstr_desc]",
+				&vinstr_client_fops,
+				cli,
+				O_RDONLY | O_CLOEXEC);
+		if (0 > *fd)
+			goto error;
+	} else if (kernel_buffer) {
+		cli->kernel_buffer = kernel_buffer;
+	} else {
+		cli->legacy_buffer = (void __user *)argp;
+	}
+
+	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->meta_idx, 0);
+	atomic_set(&cli->write_idx, 0);
+	init_waitqueue_head(&cli->waitq);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->nclients++;
+	list_add(&cli->list, &vinstr_ctx->idle_clients);
+	kbase_vinstr_update_suspend(vinstr_ctx);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return cli;
+
+error:
+	kfree(cli->dump_buffers_meta);
+	if (cli->dump_buffers)
+		free_pages(
+				(unsigned long)cli->dump_buffers,
+				get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	if (!clients_present && vinstr_ctx->kctx) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+	kfree(cli);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+
+	return NULL;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	struct kbase_vinstr_client  *iter, *tmp;
+	struct task_struct          *thread = NULL;
+	u32 zerobitmap[4] = { 0 };
+	int cli_found = 0;
+	unsigned long flags;
+	bool clients_present;
+
+	KBASE_DEBUG_ASSERT(cli);
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
+		if (iter == cli) {
+			cli_found = 1;
+			break;
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->waiting_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
+				break;
+			}
+		}
+	}
+	if (!cli_found) {
+		list_for_each_entry_safe(
+				iter, tmp, &vinstr_ctx->suspended_clients, list) {
+			if (iter == cli) {
+				cli_found = 1;
+				break;
+			}
+		}
+	}
+	KBASE_DEBUG_ASSERT(cli_found);
+
+	if (cli_found) {
+		vinstr_ctx->reprogram = true;
+		list_del(&iter->list);
+	}
+
+	if (!cli->suspended)
+		vinstr_ctx->nclients--;
+	else
+		vinstr_ctx->nclients_suspended--;
+
+	kbase_vinstr_update_suspend(vinstr_ctx);
+
+	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
+
+	/* Rebuild context bitmap now that the client has detached */
+	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+	list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
+		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	kfree(cli->dump_buffers_meta);
+	free_pages(
+			(unsigned long)cli->dump_buffers,
+			get_order(cli->dump_size * cli->buffer_count));
+	kfree(cli->accum_buffer);
+	kfree(cli);
+
+	if (!clients_present) {
+		thread = vinstr_ctx->thread;
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	}
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Thread must be stopped after lock is released. */
+	if (thread)
+		kthread_stop(thread);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+	u32 *d = dst;
+	u32 *s = src;
+	size_t i, j;
+
+	for (i = 0; i < dump_size; i += block_size) {
+		/* skip over the header block */
+		d += NR_BYTES_PER_HDR / sizeof(u32);
+		s += NR_BYTES_PER_HDR / sizeof(u32);
+		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+			/* saturate result if addition would result in wraparound */
+			if (U32_MAX - *d < *s)
+				*d = U32_MAX;
+			else
+				*d += *s;
+			d++;
+			s++;
+		}
+	}
+}
+
+/* This is the Midgard v4 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
+	size_t i, group_size, group;
+	enum {
+		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+	};
+
+	group_size = NR_CNT_BLOCKS_PER_GROUP *
+			NR_CNT_PER_BLOCK *
+			NR_BYTES_PER_CNT;
+	for (i = 0; i < nr_cg; i++) {
+		group = i * group_size;
+		/* copy shader core headers */
+		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+		       NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+		      NR_BYTES_PER_HDR);
+		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy tiler header */
+		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy mmu header */
+		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* copy job manager header */
+		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+		      NR_BYTES_PER_HDR);
+
+		/* patch the shader core enable mask */
+		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+		/* patch the tiler core enable mask */
+		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[TILER_HWCNT_BM];
+
+		/* patch the mmu core enable mask */
+		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+		/* patch the job manager enable mask */
+		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[JM_HWCNT_BM];
+	}
+}
+
+/* This is the Midgard v5 patch function.  It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client *cli)
+{
+	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
+	u32 i, nr_l2;
+	u64 core_mask;
+	u32 *mask;
+	u8 *dst = cli->accum_buffer;
+	u8 *src = vinstr_ctx->cpu_va;
+	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+	/* copy and patch job manager header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[JM_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch tiler header */
+	memcpy(dst, src, NR_BYTES_PER_HDR);
+	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+	*mask &= cli->bitmap[TILER_HWCNT_BM];
+	dst += block_size;
+	src += block_size;
+
+	/* copy and patch MMU/L2C headers */
+	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+	for (i = 0; i < nr_l2; i++) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+		dst += block_size;
+		src += block_size;
+	}
+
+	/* copy and patch shader core headers */
+	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+	while (0ull != core_mask) {
+		memcpy(dst, src, NR_BYTES_PER_HDR);
+		if (0ull != (core_mask & 1ull)) {
+			/* if block is not reserved update header */
+			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+			*mask &= cli->bitmap[SHADER_HWCNT_BM];
+		}
+		dst += block_size;
+		src += block_size;
+
+		core_mask >>= 1;
+	}
+}
+
+/**
+ * accum_clients - accumulate dumped hw counters for all known clients
+ * @vinstr_ctx: vinstr context
+ */
+static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *iter;
+	int v4 = 0;
+
+#ifndef CONFIG_MALI_NO_MALI
+	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
+	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
+		/* Don't bother accumulating clients whose hwcnt requests
+		 * have not yet been honoured. */
+		if (iter->pending)
+			continue;
+		if (v4)
+			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
+		else
+			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
+		accum_dump_buffer(
+				iter->accum_buffer,
+				vinstr_ctx->cpu_va,
+				iter->dump_size);
+	}
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ *
+ * Return: timestamp value
+ */
+static u64 kbasep_vinstr_get_timestamp(void)
+{
+	struct timespec ts;
+
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+}
+
+/**
+ * kbasep_vinstr_add_dump_request - register client's dumping request
+ * @cli:             requesting client
+ * @waiting_clients: list of pending dumping requests
+ */
+static void kbasep_vinstr_add_dump_request(
+		struct kbase_vinstr_client *cli,
+		struct list_head *waiting_clients)
+{
+	struct kbase_vinstr_client *tmp;
+
+	if (list_empty(waiting_clients)) {
+		list_add(&cli->list, waiting_clients);
+		return;
+	}
+	list_for_each_entry(tmp, waiting_clients, list) {
+		if (tmp->dump_time > cli->dump_time) {
+			list_add_tail(&cli->list, &tmp->list);
+			return;
+		}
+	}
+	list_add_tail(&cli->list, waiting_clients);
+}
+
+/**
+ * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
+ *                                        dump and accumulate them for known
+ *                                        clients
+ * @vinstr_ctx: vinstr context
+ * @timestamp: pointer where collection timestamp will be recorded
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_collect_and_accumulate(
+		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+{
+	unsigned long flags;
+	int rcode;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	/* Request HW counters dump.
+	 * Disable preemption to make dump timestamp more accurate. */
+	preempt_disable();
+	*timestamp = kbasep_vinstr_get_timestamp();
+	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
+	preempt_enable();
+
+	if (!rcode)
+		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
+	WARN_ON(rcode);
+
+	if (!rcode) {
+		/* Invalidate the kernel buffer before reading from it.
+		 * As the vinstr_ctx->lock is already held by the caller, the
+		 * unmap of kernel buffer cannot take place simultaneously.
+		 */
+		lockdep_assert_held(&vinstr_ctx->lock);
+		kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap,
+				KBASE_SYNC_TO_CPU);
+	}
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+
+	/* Accumulate values of collected counters. */
+	if (!rcode)
+		accum_clients(vinstr_ctx);
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
+ *                                  buffer
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_fill_dump_buffer(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	unsigned int write_idx = atomic_read(&cli->write_idx);
+	unsigned int read_idx  = atomic_read(&cli->read_idx);
+
+	struct kbase_hwcnt_reader_metadata *meta;
+	void                               *buffer;
+
+	/* Check if there is a place to copy HWC block into. */
+	if (write_idx - read_idx == cli->buffer_count)
+		return -1;
+	write_idx %= cli->buffer_count;
+
+	/* Fill in dump buffer and its metadata. */
+	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
+	meta   = &cli->dump_buffers_meta[write_idx];
+	meta->timestamp  = timestamp;
+	meta->event_id   = event_id;
+	meta->buffer_idx = write_idx;
+	memcpy(buffer, cli->accum_buffer, cli->dump_size);
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
+ *                                         allocated in userspace
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of legacy ioctl interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_legacy(
+		struct kbase_vinstr_client *cli)
+{
+	void __user  *buffer = cli->legacy_buffer;
+	int          rcode;
+
+	/* Copy data to user buffer. */
+	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
+	if (rcode) {
+		pr_warn("error while copying buffer to user\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+		struct kbase_vinstr_client *cli)
+{
+	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
+ * @vinstr_ctx: vinstr context
+ */
+static void kbasep_vinstr_reprogram(
+		struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
+	if (vinstr_ctx->reprogram) {
+		struct kbase_vinstr_client *iter;
+
+		if (!reprogram_hwcnt(vinstr_ctx)) {
+			vinstr_ctx->reprogram = false;
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->idle_clients,
+					list)
+				iter->pending = false;
+			list_for_each_entry(
+					iter,
+					&vinstr_ctx->waiting_clients,
+					list)
+				iter->pending = false;
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		}
+	}
+}
+
+/**
+ * kbasep_vinstr_update_client - copy accumulated counters to user readable
+ *                               buffer and notify the user
+ * @cli:       requesting client
+ * @timestamp: timestamp when counters were collected
+ * @event_id:  id of event that caused triggered counters collection
+ *
+ * Return: zero on success
+ */
+static int kbasep_vinstr_update_client(
+		struct kbase_vinstr_client *cli, u64 timestamp,
+		enum base_hwcnt_reader_event event_id)
+{
+	int rcode = 0;
+	unsigned long flags;
+
+	/* Copy collected counters to user readable buffer. */
+	if (cli->buffer_count)
+		rcode = kbasep_vinstr_fill_dump_buffer(
+				cli, timestamp, event_id);
+	else if (cli->kernel_buffer)
+		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
+	else
+		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
+
+	/* Prepare for next request. */
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+	/* Check if client was put to suspend state while it was being updated */
+	if (cli->suspended)
+		rcode = -EINVAL;
+	spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
+
+	if (rcode)
+		goto exit;
+
+	/* Notify client. Make sure all changes to memory are visible. */
+	wmb();
+	atomic_inc(&cli->write_idx);
+	wake_up_interruptible(&cli->waitq);
+
+exit:
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ *
+ * @hrtimer: high resolution timer
+ *
+ * Return: High resolution timer restart enum.
+ */
+static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
+		struct hrtimer *hrtimer)
+{
+	struct kbasep_vinstr_wake_up_timer *timer =
+		container_of(
+			hrtimer,
+			struct kbasep_vinstr_wake_up_timer,
+			hrtimer);
+
+	KBASE_DEBUG_ASSERT(timer);
+
+	atomic_set(&timer->vinstr_ctx->request_pending, 1);
+	wake_up_all(&timer->vinstr_ctx->waitq);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * kbasep_vinstr_service_task - HWC dumping service thread
+ *
+ * @data: Pointer to vinstr context structure.
+ *
+ * Return: 0 on success; -ENOMEM if timer allocation fails
+ */
+static int kbasep_vinstr_service_task(void *data)
+{
+	struct kbase_vinstr_context        *vinstr_ctx = data;
+	struct kbasep_vinstr_wake_up_timer *timer;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+
+	if (!timer) {
+		dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
+		return -ENOMEM;
+	}
+
+	hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
+	timer->vinstr_ctx       = vinstr_ctx;
+
+	while (!kthread_should_stop()) {
+		struct kbase_vinstr_client *cli = NULL;
+		struct kbase_vinstr_client *tmp;
+		int                        rcode;
+		unsigned long              flags;
+
+		u64              timestamp = kbasep_vinstr_get_timestamp();
+		u64              dump_time = 0;
+		struct list_head expired_requests;
+
+		/* Hold lock while performing operations on lists of clients. */
+		mutex_lock(&vinstr_ctx->lock);
+
+		/* Closing thread must not interact with client requests. */
+		if (current == vinstr_ctx->thread) {
+			atomic_set(&vinstr_ctx->request_pending, 0);
+
+			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+			if (!list_empty(&vinstr_ctx->waiting_clients)) {
+				cli = list_first_entry(
+						&vinstr_ctx->waiting_clients,
+						struct kbase_vinstr_client,
+						list);
+				dump_time = cli->dump_time;
+			}
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		}
+
+		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
+			mutex_unlock(&vinstr_ctx->lock);
+
+			/* Sleep until next dumping event or service request. */
+			if (cli) {
+				u64 diff = dump_time - timestamp;
+
+				hrtimer_start(
+						&timer->hrtimer,
+						ns_to_ktime(diff),
+						HRTIMER_MODE_REL);
+			}
+			wait_event(
+					vinstr_ctx->waitq,
+					atomic_read(
+						&vinstr_ctx->request_pending) ||
+					kthread_should_stop());
+			hrtimer_cancel(&timer->hrtimer);
+			continue;
+		}
+
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
+
+		INIT_LIST_HEAD(&expired_requests);
+
+		spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+		/* Find all expired requests. */
+		list_for_each_entry_safe(
+				cli,
+				tmp,
+				&vinstr_ctx->waiting_clients,
+				list) {
+			s64 tdiff =
+				(s64)(timestamp + DUMPING_RESOLUTION) -
+				(s64)cli->dump_time;
+			if (tdiff >= 0ll) {
+				list_del(&cli->list);
+				list_add(&cli->list, &expired_requests);
+			} else {
+				break;
+			}
+		}
+
+		/* Fill data for each request found. */
+		while (!list_empty(&expired_requests)) {
+			cli = list_first_entry(&expired_requests,
+					struct kbase_vinstr_client, list);
+
+			/* Ensure that legacy buffer will not be used from
+			 * this kthread context. */
+			BUG_ON(0 == cli->buffer_count);
+			/* Expect only periodically sampled clients. */
+			BUG_ON(0 == cli->dump_interval);
+
+			/* Release the spinlock, as filling the data in client's
+			 * userspace buffer could result in page faults. */
+			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
+			spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
+
+			/* This client got suspended, move to the next one. */
+			if (cli->suspended)
+				continue;
+
+			/* Set new dumping time. Drop missed probing times. */
+			do {
+				cli->dump_time += cli->dump_interval;
+			} while (cli->dump_time < timestamp);
+
+			list_del(&cli->list);
+			kbasep_vinstr_add_dump_request(
+					cli,
+					&vinstr_ctx->waiting_clients);
+		}
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		/* Reprogram counters set if required. */
+		kbasep_vinstr_reprogram(vinstr_ctx);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	}
+
+	kfree(timer);
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
+ * @cli: pointer to vinstr client structure
+ *
+ * Return: non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet
+ */
+static int kbasep_vinstr_hwcnt_reader_buffer_ready(
+		struct kbase_vinstr_client *cli)
+{
+	KBASE_DEBUG_ASSERT(cli);
+	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int meta_idx = atomic_read(&cli->meta_idx);
+	unsigned int idx = meta_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+
+	/* Metadata sanity check. */
+	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if there is any buffer available. */
+	if (atomic_read(&cli->write_idx) == meta_idx)
+		return -EAGAIN;
+
+	/* Check if previously taken buffer was put back. */
+	if (atomic_read(&cli->read_idx) != meta_idx)
+		return -EBUSY;
+
+	/* Copy next available buffer's metadata to user. */
+	if (copy_to_user(buffer, meta, size))
+		return -EFAULT;
+
+	atomic_inc(&cli->meta_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
+ * @cli:    pointer to vinstr client structure
+ * @buffer: pointer to userspace buffer
+ * @size:   size of buffer
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+		struct kbase_vinstr_client *cli, void __user *buffer,
+		size_t size)
+{
+	unsigned int read_idx = atomic_read(&cli->read_idx);
+	unsigned int idx = read_idx % cli->buffer_count;
+
+	struct kbase_hwcnt_reader_metadata meta;
+
+	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
+		return -EINVAL;
+
+	/* Check if any buffer was taken. */
+	if (atomic_read(&cli->meta_idx) == read_idx)
+		return -EPERM;
+
+	/* Check if correct buffer is put back. */
+	if (copy_from_user(&meta, buffer, size))
+		return -EFAULT;
+	if (idx != meta.buffer_idx)
+		return -EINVAL;
+
+	atomic_inc(&cli->read_idx);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @interval: periodic dumping interval (disable periodic dumping if zero)
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+		struct kbase_vinstr_client *cli, u32 interval)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (cli->suspended) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		mutex_unlock(&vinstr_ctx->lock);
+		return -ENOMEM;
+	}
+
+	list_del(&cli->list);
+
+	cli->dump_interval = interval;
+
+	/* If interval is non-zero, enable periodic dumping for this client. */
+	if (cli->dump_interval) {
+		if (DUMPING_RESOLUTION > cli->dump_interval)
+			cli->dump_interval = DUMPING_RESOLUTION;
+		cli->dump_time =
+			kbasep_vinstr_get_timestamp() + cli->dump_interval;
+
+		kbasep_vinstr_add_dump_request(
+				cli, &vinstr_ctx->waiting_clients);
+
+		atomic_set(&vinstr_ctx->request_pending, 1);
+		wake_up_all(&vinstr_ctx->waitq);
+	} else {
+		list_add(&cli->list, &vinstr_ctx->idle_clients);
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
+ * @event_id: id of event
+ * Return: event_mask or zero if event is not supported or maskable
+ */
+static u32 kbasep_vinstr_hwcnt_reader_event_mask(
+		enum base_hwcnt_reader_event event_id)
+{
+	u32 event_mask = 0;
+
+	switch (event_id) {
+	case BASE_HWCNT_READER_EVENT_PREJOB:
+	case BASE_HWCNT_READER_EVENT_POSTJOB:
+		/* These event are maskable. */
+		event_mask = (1 << event_id);
+		break;
+
+	case BASE_HWCNT_READER_EVENT_MANUAL:
+	case BASE_HWCNT_READER_EVENT_PERIODIC:
+		/* These event are non-maskable. */
+	default:
+		/* These event are not supported. */
+		break;
+	}
+
+	return event_mask;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to enable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask |= event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
+ * @cli:      pointer to vinstr client structure
+ * @event_id: id of event to disable
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+		struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+	u32                         event_mask;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
+	if (!event_mask)
+		return -EINVAL;
+
+	mutex_lock(&vinstr_ctx->lock);
+	cli->event_mask &= ~event_mask;
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
+ * @cli:   pointer to vinstr client structure
+ * @hwver: pointer to user buffer where hw version will be stored
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+		struct kbase_vinstr_client *cli, u32 __user *hwver)
+{
+#ifndef CONFIG_MALI_NO_MALI
+	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
+#endif
+
+	u32                         ver = 5;
+
+#ifndef CONFIG_MALI_NO_MALI
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+		ver = 4;
+#endif
+
+	return put_user(ver, hwver);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
+ * @filp:   pointer to file structure
+ * @cmd:    user command
+ * @arg:    command's argument
+ *
+ * Return: zero on success
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	long                       rcode = 0;
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+		return -EINVAL;
+
+	switch (cmd) {
+	case KBASE_HWCNT_READER_GET_API_VERSION:
+		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_HWVER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
+				cli, (u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
+		rcode = put_user(
+				(u32)cli->vinstr_ctx->dump_size,
+				(u32 __user *)arg);
+		break;
+	case KBASE_HWCNT_READER_DUMP:
+		rcode = kbase_vinstr_hwc_dump(
+				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		break;
+	case KBASE_HWCNT_READER_CLEAR:
+		rcode = kbase_vinstr_hwc_clear(cli);
+		break;
+	case KBASE_HWCNT_READER_GET_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_PUT_BUFFER:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
+				cli, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	case KBASE_HWCNT_READER_SET_INTERVAL:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
+				cli, (u32)arg);
+		break;
+	case KBASE_HWCNT_READER_ENABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	case KBASE_HWCNT_READER_DISABLE_EVENT:
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
+				cli, (enum base_hwcnt_reader_event)arg);
+		break;
+	default:
+		rcode = -EINVAL;
+		break;
+	}
+
+	return rcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
+		poll_table *wait)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	poll_wait(filp, &cli->waitq, wait);
+	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
+ * @filp: pointer to file structure
+ * @vma:  pointer to vma structure
+ * Return: zero on success
+ */
+static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct kbase_vinstr_client *cli;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(vma);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	size = cli->buffer_count * cli->dump_size;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if (vm_size > size - offset)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
+
+	return remap_pfn_range(
+			vma,
+			vma->vm_start,
+			pfn,
+			vm_size,
+			vma->vm_page_prot);
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
+ * @inode: pointer to inode structure
+ * @filp:  pointer to file structure
+ * Return always return zero
+ */
+static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
+		struct file *filp)
+{
+	struct kbase_vinstr_client *cli;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+
+	cli = filp->private_data;
+	KBASE_DEBUG_ASSERT(cli);
+
+	kbase_vinstr_detach_client(cli);
+	return 0;
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_backend_slot_update(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+
+	if (vinstr_ctx->need_resume) {
+		vinstr_ctx->need_resume = false;
+		vinstr_ctx->state = VINSTR_RESUMING;
+		schedule_work(&vinstr_ctx->resume_work);
+
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * This must happen after vinstr was suspended.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
+}
+
+/**
+ * kbasep_vinstr_resume_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+
+	if (vinstr_ctx->need_suspend) {
+		vinstr_ctx->need_suspend = false;
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+	} else {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+		mutex_unlock(&vinstr_ctx->lock);
+
+		/* Kick GPU scheduler to allow entering protected mode.
+		 * Note that scheduler state machine might requested re-entry to
+		 * protected mode before vinstr was resumed.
+		 * This must happen after vinstr was release.
+		 */
+		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+	}
+}
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+
+	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
+	if (!vinstr_ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
+	INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
+	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
+	vinstr_ctx->kbdev = kbdev;
+	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
+
+	atomic_set(&vinstr_ctx->request_pending, 0);
+	init_waitqueue_head(&vinstr_ctx->waitq);
+
+	return vinstr_ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
+{
+	struct kbase_vinstr_client *cli;
+
+	/* Stop service thread first. */
+	if (vinstr_ctx->thread)
+		kthread_stop(vinstr_ctx->thread);
+
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
+	while (1) {
+		struct list_head *list = &vinstr_ctx->idle_clients;
+
+		if (list_empty(list)) {
+			list = &vinstr_ctx->waiting_clients;
+			if (list_empty(list)) {
+				list = &vinstr_ctx->suspended_clients;
+				if (list_empty(list))
+					break;
+			}
+		}
+
+		cli = list_first_entry(list, struct kbase_vinstr_client, list);
+		list_del(&cli->list);
+		if (!cli->suspended)
+			vinstr_ctx->nclients--;
+		else
+			vinstr_ctx->nclients_suspended--;
+		kfree(cli->accum_buffer);
+		kfree(cli);
+	}
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
+	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
+	if (vinstr_ctx->kctx)
+		kbasep_vinstr_destroy_kctx(vinstr_ctx);
+	kfree(vinstr_ctx);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	struct kbase_vinstr_client  *cli;
+	u32                         bitmap[4];
+	int                         fd;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(setup);
+	KBASE_DEBUG_ASSERT(setup->buffer_count);
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	cli = kbasep_vinstr_attach_client(
+			vinstr_ctx,
+			setup->buffer_count,
+			bitmap,
+			&fd,
+			NULL);
+
+	if (!cli)
+		return -ENOMEM;
+
+	kbase_vinstr_wait_for_ready(vinstr_ctx);
+	return fd;
+}
+
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_ioctl_hwcnt_enable *enable)
+{
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	KBASE_DEBUG_ASSERT(enable);
+	KBASE_DEBUG_ASSERT(cli);
+
+	if (enable->dump_buffer) {
+		u32 bitmap[4];
+
+		bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
+		bitmap[TILER_HWCNT_BM]  = enable->tiler_bm;
+		bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
+		bitmap[JM_HWCNT_BM]     = enable->jm_bm;
+
+		if (*cli)
+			return -EBUSY;
+
+		*cli = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				(void *)(uintptr_t)enable->dump_buffer,
+				NULL);
+
+		if (!(*cli))
+			return -ENOMEM;
+
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+	} else {
+		if (!*cli)
+			return -EINVAL;
+
+		kbase_vinstr_detach_client(*cli);
+		*cli = NULL;
+	}
+
+	return 0;
+}
+
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
+		void *kernel_buffer)
+{
+	struct kbase_vinstr_client *kernel_client;
+	u32 bitmap[4];
+
+	if (!vinstr_ctx || !setup || !kernel_buffer)
+		return NULL;
+
+	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+	kernel_client = kbasep_vinstr_attach_client(
+				vinstr_ctx,
+				0,
+				bitmap,
+				NULL,
+				kernel_buffer);
+
+	if (kernel_client)
+		kbase_vinstr_wait_for_ready(vinstr_ctx);
+
+	return kernel_client;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
+int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
+		enum base_hwcnt_reader_event event_id)
+{
+	int                         rcode = 0;
+	struct kbase_vinstr_context *vinstr_ctx;
+	u64                         timestamp;
+	u32                         event_mask;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
+	event_mask = 1 << event_id;
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (event_mask & cli->event_mask) {
+		rcode = kbasep_vinstr_collect_and_accumulate(
+				vinstr_ctx,
+				&timestamp);
+		if (rcode)
+			goto exit;
+
+		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
+		if (rcode)
+			goto exit;
+
+		kbasep_vinstr_reprogram(vinstr_ctx);
+	}
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
+
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	int                         rcode;
+	u64                         unused;
+
+	if (!cli)
+		return -EINVAL;
+
+	vinstr_ctx = cli->vinstr_ctx;
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
+	if (rcode)
+		goto exit;
+	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
+	if (rcode)
+		goto exit;
+	memset(cli->accum_buffer, 0, cli->dump_size);
+
+	kbasep_vinstr_reprogram(vinstr_ctx);
+
+exit:
+	mutex_unlock(&vinstr_ctx->lock);
+
+	return rcode;
+}
+
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->forced_suspend = true;
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		if (vinstr_ctx->clients_present) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		} else {
+			vinstr_ctx->state = VINSTR_SUSPENDED;
+
+			vinstr_ctx->suspend_cnt++;
+			/* overflow shall not happen */
+			WARN_ON(0 == vinstr_ctx->suspend_cnt);
+			ret = 0;
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		vinstr_ctx->need_suspend = true;
+		break;
+
+	case VINSTR_SUSPENDING:
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+	int ret = -EAGAIN;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+	case VINSTR_RESUMING:
+	case VINSTR_SUSPENDING:
+		break;
+
+	case VINSTR_IDLE:
+	case VINSTR_DUMPING:
+		ret = 0;
+		break;
+	default:
+		KBASE_DEBUG_ASSERT(0);
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	return ret;
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
+{
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_is_ready(vinstr_ctx)));
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
+
+/**
+ * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
+ *                               on nclients
+ * @vinstr_ctx: vinstr context pointer
+ *
+ * This function should be called whenever vinstr_ctx->nclients changes. This
+ * may cause vinstr to be suspended or resumed, depending on the number of
+ * clients and whether IPA is suspended or not.
+ */
+static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
+{
+	lockdep_assert_held(&vinstr_ctx->state_lock);
+
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
+		break;
+
+	case VINSTR_SUSPENDING:
+		if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend))
+			vinstr_ctx->need_resume = true;
+		break;
+
+	case VINSTR_IDLE:
+		if (!vinstr_ctx->nclients) {
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+			schedule_work(&vinstr_ctx->suspend_work);
+		}
+		break;
+
+	case VINSTR_DUMPING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_RESUMING:
+		if (!vinstr_ctx->nclients)
+			vinstr_ctx->need_suspend = true;
+		break;
+	}
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->forced_suspend = false;
+			if (vinstr_ctx->clients_present) {
+				vinstr_ctx->state = VINSTR_RESUMING;
+				schedule_work(&vinstr_ctx->resume_work);
+			} else {
+				vinstr_ctx->state = VINSTR_IDLE;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (!client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->suspended_clients);
+
+		vinstr_ctx->nclients--;
+		vinstr_ctx->nclients_suspended++;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = true;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
+
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
+{
+	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+
+	if (client->suspended) {
+		list_del(&client->list);
+		list_add(&client->list, &vinstr_ctx->idle_clients);
+
+		vinstr_ctx->nclients++;
+		vinstr_ctx->nclients_suspended--;
+		kbase_vinstr_update_suspend(vinstr_ctx);
+
+		client->suspended = false;
+	}
+
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 0000000000000..d32799f74084d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+#include <mali_kbase_hwcnt_reader.h>
+#include <mali_kbase_ioctl.h>
+
+/*****************************************************************************/
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/*****************************************************************************/
+
+/**
+ * kbase_vinstr_init() - initialize the vinstr core
+ * @kbdev: kbase device
+ *
+ * Return: pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - terminate the vinstr core
+ * @vinstr_ctx: vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
+ * @vinstr_ctx: vinstr context
+ * @setup:      reader's configuration
+ *
+ * Return: file descriptor on success and a (negative) error code otherwise
+ */
+int kbase_vinstr_hwcnt_reader_setup(
+		struct kbase_vinstr_context        *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup);
+
+/**
+ * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
+ * @vinstr_ctx: vinstr context
+ * @cli:        pointer where to store pointer to new vinstr client structure
+ * @enable:      hwc configuration
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_legacy_hwc_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_vinstr_client  **cli,
+		struct kbase_ioctl_hwcnt_enable *enable);
+
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count is not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+		struct kbase_vinstr_context *vinstr_ctx,
+		struct kbase_ioctl_hwcnt_reader_setup *setup,
+		void *kernel_buffer);
+
+/**
+ * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
+ * @cli:      pointer to vinstr client
+ * @event_id: id of event that triggered hwcnt dump
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_dump(
+		struct kbase_vinstr_client   *cli,
+		enum base_hwcnt_reader_event event_id);
+
+/**
+ * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
+ *                          a given kbase context
+ * @cli: pointer to vinstr client
+ *
+ * Return: zero on success
+ */
+int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
+ */
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
+ * @vinstr_ctx: vinstr context
+ *
+ * Function waits for the vinstr to become ready for dumping. It can be in the
+ * resuming state after the client was attached but the client currently expects
+ * that vinstr is ready for dumping immediately post attach.
+ */
+void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
+ */
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_suspend_client - Suspend vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
+
+/**
+ * kbase_vinstr_resume_client - Resume vinstr client
+ * @client: pointer to vinstr client
+ */
+void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+
+#endif /* _KBASE_VINSTR_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 0000000000000..920562e40603e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,205 @@
+/*
+ *
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 0000000000000..0741dfcab575b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,194 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#include <linux/tracepoint.h>
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+			unsigned char job_id),
+	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned int, tgid)
+		__field(unsigned int, pid)
+		__field(unsigned char, job_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->tgid = tgid;
+		__entry->pid = pid;
+		__entry->job_id = job_id;
+	),
+	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+	TP_PROTO(unsigned int event_id, unsigned long long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(unsigned int, event_id)
+		__field(unsigned long long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+	TP_PROTO(int event_id, unsigned long value),
+	TP_ARGS(event_id, value),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+		__field(unsigned long, value)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+		__entry->value = value;
+	),
+	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+	TP_PROTO(int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+	TP_PROTO(long long int event_id),
+	TP_ARGS(event_id),
+	TP_STRUCT__entry(
+		__field(long long int, event_id)
+	),
+	TP_fast_assign(
+		__entry->event_id = event_id;
+	),
+	TP_printk("event=%lld", __entry->event_id)
+);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 0000000000000..f17bd5edf7e1e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,136 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX          ((u8)~0U)
+#define S8_MAX          ((s8)(U8_MAX>>1))
+#define S8_MIN          ((s8)(-S8_MAX - 1))
+#define U16_MAX         ((u16)~0U)
+#define S16_MAX         ((s16)(U16_MAX>>1))
+#define S16_MIN         ((s16)(-S16_MAX - 1))
+#define U32_MAX         ((u32)~0U)
+#define S32_MAX         ((s32)(U32_MAX>>1))
+#define S32_MIN         ((s32)(-S32_MAX - 1))
+#define U64_MAX         ((u64)~0ULL)
+#define S64_MAX         ((s64)(U64_MAX>>1))
+#define S64_MIN         ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX        (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y)	((x) < (y) ? (x) : (y))
+
+/**
+ * MAX -  Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y)	((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x)	((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...)	((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x)	#x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x)	CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+	do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_coherency.h b/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644
index 0000000000000..29d5df38c92b1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_coherency.h
@@ -0,0 +1,31 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 0000000000000..8d9f7b61b6d30
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,643 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define CORE_FEATURES           0x008	/* (RO) Shader Core Features */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+#define LATEST_FLUSH            0x038	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that
+					 * TLS must be allocated for
+					 */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC	/* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+
+#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+/* JOB IRQ flags */
+#define JOB_IRQ_GLOBAL_IF       (1 << 18)   /* Global interface interrupt received */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER       (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT      (12)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS            (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT     (15)
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES           (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+/* JM_CONFIG register */
+
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+/* End JM_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 0000000000000..c81f404de26a1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,146 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The int return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return 0 on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 0000000000000..ef9fb963ecf56
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
+#
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 0000000000000..ce637fbb5ef7a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 0000000000000..ccefddf882fdc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase_config.h>
+
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &dummy_platform_config;
+}
+
+#ifndef CONFIG_OF
+int kbase_platform_register(void)
+{
+	return 0;
+}
+
+void kbase_platform_unregister(void)
+{
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100644
index 0000000000000..5990313308fae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,46 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Autosuspend delay
+ *
+ * The delay time (in milliseconds) to be used for autosuspend
+ */
+#define AUTO_SUSPEND_DELAY (100)
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100644
index 0000000000000..c5f3ad70f4385
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,128 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include "mali_kbase_config_platform.h"
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	int ret = 1; /* Assume GPU has been powered off */
+	int error;
+
+	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+			(void *)kbdev->dev->pm_domain);
+
+	error = pm_runtime_get_sync(kbdev->dev);
+	if (error == 1) {
+		/*
+		 * Let core know that the chip has not been
+		 * powered off, so we can save on re-initialization.
+		 */
+		ret = 0;
+	}
+
+	dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error);
+
+	return ret;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+
+	pm_runtime_mark_last_busy(kbdev->dev);
+	pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+#ifdef KBASE_PM_RUNTIME
+static int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+
+	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+
+	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
+	pm_runtime_use_autosuspend(kbdev->dev);
+
+	pm_runtime_set_active(kbdev->dev);
+	pm_runtime_enable(kbdev->dev);
+
+	if (!pm_runtime_enabled(kbdev->dev)) {
+		dev_warn(kbdev->dev, "pm_runtime not enabled");
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	pm_runtime_disable(kbdev->dev);
+}
+#endif
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+	return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+	int ret = pm_callback_runtime_on(kbdev);
+
+	WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+	pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback = pm_callback_suspend,
+	.power_resume_callback = pm_callback_resume,
+#ifdef KBASE_PM_RUNTIME
+	.power_runtime_init_callback = kbase_device_runtime_init,
+	.power_runtime_term_callback = kbase_device_runtime_disable,
+	.power_runtime_on_callback = pm_callback_runtime_on,
+	.power_runtime_off_callback = pm_callback_runtime_off,
+#else				/* KBASE_PM_RUNTIME */
+	.power_runtime_init_callback = NULL,
+	.power_runtime_term_callback = NULL,
+	.power_runtime_on_callback = NULL,
+	.power_runtime_off_callback = NULL,
+#endif				/* KBASE_PM_RUNTIME */
+};
+
+
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 0000000000000..6780e4c9433b7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 0000000000000..fac3cd52182ff
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000000000..d165ce2628148
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_config_platform.h"
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 0000000000000..51b408efd48ad
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 0000000000000..fac3cd52182ff
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000000000..efca0a5b3493b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 0000000000000..e07709c9b1a52
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,25 @@
+#
+# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \
+	mali_kbase_platform_fake.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 0000000000000..fac3cd52182ff
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 0000000000000..b6714b95b7765
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 0000000000000..ef1ec708edef0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/drivers/gpu/arm/midgard/protected_mode_switcher.h b/drivers/gpu/arm/midgard/protected_mode_switcher.h
new file mode 100644
index 0000000000000..8778d812aea00
--- /dev/null
+++ b/drivers/gpu/arm/midgard/protected_mode_switcher.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MODE_SWITCH_H_
+#define _PROTECTED_MODE_SWITCH_H_
+
+struct protected_mode_device;
+
+/**
+ * struct protected_mode_ops - Callbacks for protected mode switch operations
+ *
+ * @protected_mode_enable:  Callback to enable protected mode for device
+ * @protected_mode_disable: Callback to disable protected mode for device
+ */
+struct protected_mode_ops {
+	/**
+	 * protected_mode_enable() - Enable protected mode on device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enable)(
+			struct protected_mode_device *protected_dev);
+
+	/**
+	 * protected_mode_disable() - Disable protected mode on device, and
+	 *                            reset device
+	 * @dev:	The struct device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_disable)(
+			struct protected_mode_device *protected_dev);
+};
+
+/**
+ * struct protected_mode_device - Device structure for protected mode devices
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_mode_device {
+	struct protected_mode_ops ops;
+	void *data;
+};
+
+#endif /* _PROTECTED_MODE_SWITCH_H_ */
diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript
new file mode 100644
index 0000000000000..01c75895611f1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/sconscript
@@ -0,0 +1,67 @@
+#
+# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import sys
+Import('env')
+
+SConscript( 'tests/sconscript' )
+
+mock_test = 0
+
+# Source files required for kbase.
+kbase_src = [
+	Glob('*.c'),
+	Glob('backend/*/*.c'),
+	Glob('internal/*/*.c'),
+	Glob('ipa/*.c'),
+	Glob('platform/%s/*.c' % env['platform_config']),
+	Glob('thirdparty/*.c'),
+]
+
+if env['platform_config']=='juno_soc':
+	kbase_src += [Glob('platform/devicetree/*.c')]
+else:
+	kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])]
+
+if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
+	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
+	mock_test = 1
+
+make_args = env.kernel_get_config_defines(ret_list = True) + [
+	'PLATFORM=%s' % env['platform'],
+	'MALI_KERNEL_TEST_API=%s' % env['debug'],
+	'MALI_UNIT_TEST=%s' % env['unit'],
+	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
+	'MALI_MOCK_TEST=%s' % mock_test,
+	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+	'MALI_USE_CSF=%s' % env['csf'],
+	'MALI_COVERAGE=%s' % env['coverage'],
+]
+
+kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
+                              make_args = make_args)
+
+if 'smc_protected_mode_switcher' in env:
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko')
+
+env.KernelObjTarget('kbase', kbase)
+
+env.AppendUnique(BASE=['cutils_linked_list'])
diff --git a/drivers/gpu/arm/midgard/tests/Kbuild b/drivers/gpu/arm/midgard/tests/Kbuild
new file mode 100644
index 0000000000000..df16a77a7f66e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+obj-$(CONFIG_MALI_KUTF) += kutf/
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
diff --git a/drivers/gpu/arm/midgard/tests/Kconfig b/drivers/gpu/arm/midgard/tests/Kconfig
new file mode 100644
index 0000000000000..fa91aea4ac5c5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
diff --git a/drivers/gpu/arm/midgard/tests/Mconfig b/drivers/gpu/arm/midgard/tests/Mconfig
new file mode 100644
index 0000000000000..ddd76305dbb87
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/Mconfig
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+
+config UNIT_TEST_KERNEL_MODULES
+	bool
+	default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES
+	default n
+
+config BUILD_IPA_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
+	default n
+
+config BUILD_CSF_TESTS
+	bool
+	default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
+	default n
diff --git a/drivers/gpu/arm/midgard/tests/build.bp b/drivers/gpu/arm/midgard/tests/build.bp
new file mode 100644
index 0000000000000..3107062d6610f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/build.bp
@@ -0,0 +1,36 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_defaults {
+    name: "kernel_test_module_defaults",
+    defaults: ["mali_kbase_shared_config_defaults"],
+    include_dirs: [
+        "kernel/drivers/gpu/arm",
+        "kernel/drivers/gpu/arm/midgard",
+        "kernel/drivers/gpu/arm/midgard/backend/gpu",
+        "kernel/drivers/gpu/arm/midgard/tests/include",
+    ],
+}
+
+subdirs = [
+    "kutf",
+    "mali_kutf_irq_test",
+]
+
+optional_subdirs = [
+    "kutf_test",
+    "kutf_test_runner",
+    "mali_kutf_ipa_test",
+    "mali_kutf_ipa_unit_test",
+    "mali_kutf_vinstr_test",
+    "mali_kutf_fw_test",
+]
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
new file mode 100644
index 0000000000000..15e168c2385b7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_H_
+#define _KERNEL_UTF_HELPERS_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure.
+ *
+ * These functions provide methods for enqueuing/dequeuing lines of text sent
+ * by user space. They are used to implement the transfer of "userdata" from
+ * user space to kernel.
+ */
+
+#include <kutf/kutf_suite.h>
+
+/**
+ * kutf_helper_input_dequeue() - Dequeue a line sent by user space
+ * @context:    KUTF context
+ * @str_size:   Pointer to an integer to receive the size of the string
+ *
+ * If no line is available then this function will wait (interruptibly) until
+ * a line is available.
+ *
+ * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end
+ * of data.
+ */
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size);
+
+/**
+ * kutf_helper_input_enqueue() - Enqueue a line sent by user space
+ * @context:   KUTF context
+ * @str:       The user space address of the line
+ * @size:      The length in bytes of the string
+ *
+ * This function will use copy_from_user to copy the string out of user space.
+ * The string need not be NULL-terminated (@size should not include the NULL
+ * termination).
+ *
+ * As a special case @str==NULL and @size==0 is valid to mark the end of input,
+ * but callers should use kutf_helper_input_enqueue_end_of_data() instead.
+ *
+ * Return: 0 on success, -EFAULT if the line cannot be copied from user space,
+ * -ENOMEM if out of memory.
+ */
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size);
+
+/**
+ * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent
+ * @context:    KUTF context
+ *
+ * After this function has been called, kutf_helper_input_dequeue() will always
+ * return NULL.
+ */
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
new file mode 100644
index 0000000000000..3b1300e1ce6f7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_HELPERS_USER_H_
+#define _KERNEL_UTF_HELPERS_USER_H_
+
+/* kutf_helpers.h
+ * Test helper functions for the kernel UTF test infrastructure, whose
+ * implementation mirrors that of similar functions for kutf-userside
+ */
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_helpers.h>
+
+
+#define KUTF_HELPER_MAX_VAL_NAME_LEN 255
+
+enum kutf_helper_valtype {
+	KUTF_HELPER_VALTYPE_INVALID,
+	KUTF_HELPER_VALTYPE_U64,
+	KUTF_HELPER_VALTYPE_STR,
+
+	KUTF_HELPER_VALTYPE_COUNT /* Must be last */
+};
+
+struct kutf_helper_named_val {
+	enum kutf_helper_valtype type;
+	char *val_name;
+	union {
+		u64 val_u64;
+		char *val_str;
+	} u;
+};
+
+/* Extra error values for certain helpers when we want to distinguish between
+ * Linux's own error values too.
+ *
+ * These can only be used on certain functions returning an int type that are
+ * documented as returning one of these potential values, they cannot be used
+ * from functions return a ptr type, since we can't decode it with PTR_ERR
+ *
+ * No negative values are used - Linux error codes should be used instead, and
+ * indicate a problem in accessing the data file itself (are generally
+ * unrecoverable)
+ *
+ * Positive values indicate correct access but invalid parsing (can be
+ * recovered from assuming data in the future is correct) */
+enum kutf_helper_err {
+	/* No error - must be zero */
+	KUTF_HELPER_ERR_NONE = 0,
+	/* Named value parsing encountered an invalid name */
+	KUTF_HELPER_ERR_INVALID_NAME,
+	/* Named value parsing of string or u64 type encountered extra
+	 * characters after the value (after the last digit for a u64 type or
+	 * after the string end delimiter for string type) */
+	KUTF_HELPER_ERR_CHARS_AFTER_VAL,
+	/* Named value parsing of string type couldn't find the string end
+	 * delimiter.
+	 *
+	 * This cannot be encountered when the NAME="value" message exceeds the
+	 * textbuf's maximum line length, because such messages are not checked
+	 * for an end string delimiter */
+	KUTF_HELPER_ERR_NO_END_DELIMITER,
+	/* Named value didn't parse as any of the known types */
+	KUTF_HELPER_ERR_INVALID_VALUE,
+};
+
+
+/* Send named NAME=value pair, u64 value
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val);
+
+/* Get the maximum length of a string that can be represented as a particular
+ * NAME="value" pair without string-value truncation in the kernel's buffer
+ *
+ * Given val_name and the kernel buffer's size, this can be used to determine
+ * the maximum length of a string that can be sent as val_name="value" pair
+ * without having the string value truncated. Any string longer than this will
+ * be truncated at some point during communication to this size.
+ *
+ * It is assumed that val_name is a valid name for
+ * kutf_helper_send_named_str(), and no checking will be made to
+ * ensure this.
+ *
+ * Returns the maximum string length that can be represented, or a negative
+ * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz
+ */
+int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz);
+
+/* Send named NAME="str" pair
+ *
+ * no escaping allowed in str. Any of the following characters will terminate
+ * the string: '"' '\\' '\n'
+ *
+ * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long
+ *
+ * Any failure will be logged on the suite's current test fixture
+ *
+ * Returns 0 on success, non-zero on failure */
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name, const char *val_str);
+
+/* Receive named NAME=value pair
+ *
+ * This can receive u64 and string values - check named_val->type
+ *
+ * If you are not planning on dynamic handling of the named value's name and
+ * type, then kutf_helper_receive_check_val() is more useful as a
+ * convenience function.
+ *
+ * String members of named_val will come from memory allocated on the fixture's mempool
+ *
+ * Returns 0 on success. Negative value on failure to receive from the 'run'
+ * file, positive value indicates an enum kutf_helper_err value for correct
+ * reception of data but invalid parsing */
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val);
+
+/* Receive and validate NAME=value pair
+ *
+ * As with kutf_helper_receive_named_val, but validate that the
+ * name and type are as expected, as a convenience for a common pattern found
+ * in tests.
+ *
+ * NOTE: this only returns an error value if there was actually a problem
+ * receiving data.
+ *
+ * NOTE: If the underlying data was received correctly, but:
+ * - isn't of the expected name
+ * - isn't the expected type
+ * - isn't correctly parsed for the type
+ * then the following happens:
+ * - failure result is recorded
+ * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID
+ * - named_val->u will contain some default value that should be relatively
+ *   harmless for the test, including being writable in the case of string
+ *   values
+ * - return value will be 0 to indicate success
+ *
+ * The rationale behind this is that we'd prefer to continue the rest of the
+ * test with failures propagated, rather than hitting a timeout */
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type);
+
+/* Output a named value to kmsg */
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val);
+
+
+#endif	/* _KERNEL_UTF_HELPERS_USER_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
new file mode 100644
index 0000000000000..988559de1edf3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_MEM_H_
+#define _KERNEL_UTF_MEM_H_
+
+/* kutf_mem.h
+ * Functions for management of memory pools in the kernel.
+ *
+ * This module implements a memory pool allocator, allowing a test
+ * implementation to allocate linked allocations which can then be freed by a
+ * single free which releases all of the resources held by the entire pool.
+ *
+ * Note that it is not possible to free single resources within the pool once
+ * allocated.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kutf_mempool - the memory pool context management structure
+ * @head:	list head on which the allocations in this context are added to
+ * @lock:	mutex for concurrent allocation from multiple threads
+ *
+ */
+struct kutf_mempool {
+	struct list_head head;
+	struct mutex lock;
+};
+
+/**
+ * kutf_mempool_init() - Initialize a memory pool.
+ * @pool:	Memory pool structure to initialize, provided by the user
+ *
+ * Return:	zero on success
+ */
+int kutf_mempool_init(struct kutf_mempool *pool);
+
+/**
+ * kutf_mempool_alloc() - Allocate memory from a pool
+ * @pool:	Memory pool to allocate from
+ * @size:	Size of memory wanted in number of bytes
+ *
+ * Return:	Pointer to memory on success, NULL on failure.
+ */
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size);
+
+/**
+ * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it.
+ * @pool:	The memory pool to free
+ */
+void kutf_mempool_destroy(struct kutf_mempool *pool);
+#endif	/* _KERNEL_UTF_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
new file mode 100644
index 0000000000000..49ebeb4ec546f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h
@@ -0,0 +1,181 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_RESULTSET_H_
+#define _KERNEL_UTF_RESULTSET_H_
+
+/* kutf_resultset.h
+ * Functions and structures for handling test results and result sets.
+ *
+ * This section of the kernel UTF contains structures and functions used for the
+ * management of Results and Result Sets.
+ */
+
+/**
+ * enum kutf_result_status - Status values for a single Test error.
+ * @KUTF_RESULT_BENCHMARK:	Result is a meta-result containing benchmark
+ *                              results.
+ * @KUTF_RESULT_SKIP:		The test was skipped.
+ * @KUTF_RESULT_UNKNOWN:	The test has an unknown result.
+ * @KUTF_RESULT_PASS:		The test result passed.
+ * @KUTF_RESULT_DEBUG:		The test result passed, but raised a debug
+ *                              message.
+ * @KUTF_RESULT_INFO:		The test result passed, but raised
+ *                              an informative message.
+ * @KUTF_RESULT_WARN:		The test result passed, but raised a warning
+ *                              message.
+ * @KUTF_RESULT_FAIL:		The test result failed with a non-fatal error.
+ * @KUTF_RESULT_FATAL:		The test result failed with a fatal error.
+ * @KUTF_RESULT_ABORT:		The test result failed due to a non-UTF
+ *                              assertion failure.
+ * @KUTF_RESULT_USERDATA:	User data is ready to be read,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_USERDATA_WAIT:	Waiting for user data to be sent,
+ *                              this is not seen outside the kernel
+ * @KUTF_RESULT_TEST_FINISHED:	The test has finished, no more results will
+ *                              be produced. This is not seen outside kutf
+ */
+enum kutf_result_status {
+	KUTF_RESULT_BENCHMARK = -3,
+	KUTF_RESULT_SKIP    = -2,
+	KUTF_RESULT_UNKNOWN = -1,
+
+	KUTF_RESULT_PASS    = 0,
+	KUTF_RESULT_DEBUG   = 1,
+	KUTF_RESULT_INFO    = 2,
+	KUTF_RESULT_WARN    = 3,
+	KUTF_RESULT_FAIL    = 4,
+	KUTF_RESULT_FATAL   = 5,
+	KUTF_RESULT_ABORT   = 6,
+
+	KUTF_RESULT_USERDATA      = 7,
+	KUTF_RESULT_USERDATA_WAIT = 8,
+	KUTF_RESULT_TEST_FINISHED = 9
+};
+
+/* The maximum size of a kutf_result_status result when
+ * converted to a string
+ */
+#define KUTF_ERROR_MAX_NAME_SIZE 21
+
+#ifdef __KERNEL__
+
+#include <kutf/kutf_mem.h>
+#include <linux/wait.h>
+
+struct kutf_context;
+
+/**
+ * struct kutf_result - Represents a single test result.
+ * @node:	Next result in the list of results.
+ * @status:	The status summary (pass / warn / fail / etc).
+ * @message:	A more verbose status message.
+ */
+struct kutf_result {
+	struct list_head            node;
+	enum kutf_result_status     status;
+	const char                  *message;
+};
+
+/**
+ * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data
+ *
+ * This flag is set within a struct kutf_result_set whenever the test is blocked
+ * waiting for user data. Attempts to dequeue results when this flag is set
+ * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This
+ * is used to output a warning message and end of file.
+ */
+#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1
+
+/**
+ * struct kutf_result_set - Represents a set of results.
+ * @results:	List head of a struct kutf_result list for storing the results
+ * @waitq:	Wait queue signalled whenever new results are added.
+ * @flags:	Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT
+ */
+struct kutf_result_set {
+	struct list_head          results;
+	wait_queue_head_t         waitq;
+	int                       flags;
+};
+
+/**
+ * kutf_create_result_set() - Create a new result set
+ *                            to which results can be added.
+ *
+ * Return: The created result set.
+ */
+struct kutf_result_set *kutf_create_result_set(void);
+
+/**
+ * kutf_add_result() - Add a result to the end of an existing result set.
+ *
+ * @context:	The kutf context
+ * @status:	The result status to add.
+ * @message:	The result message to add.
+ *
+ * Return: 0 if the result is successfully added. -ENOMEM if allocation fails.
+ */
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status, const char *message);
+
+/**
+ * kutf_remove_result() - Remove a result from the head of a result set.
+ * @set:	The result set.
+ *
+ * This function will block until there is a result to read. The wait is
+ * interruptible, so this function will return with an ERR_PTR if interrupted.
+ *
+ * Return: result or ERR_PTR if interrupted
+ */
+struct kutf_result *kutf_remove_result(
+		struct kutf_result_set *set);
+
+/**
+ * kutf_destroy_result_set() - Free a previously created result set.
+ *
+ * @results:	The result set whose resources to free.
+ */
+void kutf_destroy_result_set(struct kutf_result_set *results);
+
+/**
+ * kutf_set_waiting_for_input() - The test is waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Causes the result set to always have results and return a fake
+ * %KUTF_RESULT_USERDATA_WAIT result.
+ */
+void kutf_set_waiting_for_input(struct kutf_result_set *set);
+
+/**
+ * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata
+ *
+ * @set: The result set to update
+ *
+ * Cancels the effect of kutf_set_waiting_for_input()
+ */
+void kutf_clear_waiting_for_input(struct kutf_result_set *set);
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _KERNEL_UTF_RESULTSET_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
new file mode 100644
index 0000000000000..8d75f506f9eb8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h
@@ -0,0 +1,569 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_SUITE_H_
+#define _KERNEL_UTF_SUITE_H_
+
+/* kutf_suite.h
+ * Functions for management of test suites.
+ *
+ * This collection of data structures, macros, and functions are used to
+ * create Test Suites, Tests within those Test Suites, and Fixture variants
+ * of each test.
+ */
+
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+
+#include <kutf/kutf_mem.h>
+#include <kutf/kutf_resultset.h>
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define KUTF_MAX_LINE_LENGTH (1024u)
+
+/**
+ * Pseudo-flag indicating an absence of any specified test class. Note that
+ * tests should not be annotated with this constant as it is simply a zero
+ * value; tests without a more specific class must be marked with the flag
+ * KUTF_F_TEST_GENERIC.
+ */
+#define KUTF_F_TEST_NONE                ((unsigned int)(0))
+
+/**
+ * Class indicating this test is a smoke test.
+ * A given set of smoke tests should be quick to run, enabling rapid turn-around
+ * of "regress-on-commit" test runs.
+ */
+#define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
+
+/**
+ * Class indicating this test is a performance test.
+ * These tests typically produce a performance metric, such as "time to run" or
+ * "frames per second",
+ */
+#define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
+
+/**
+ * Class indicating that this test is a deprecated test.
+ * These tests have typically been replaced by an alternative test which is
+ * more efficient, or has better coverage.
+ */
+#define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
+
+/**
+ * Class indicating that this test is a known failure.
+ * These tests have typically been run and failed, but marking them as a known
+ * failure means it is easier to triage results.
+ *
+ * It is typically more convenient to triage known failures using the
+ * results database and web UI, as this means there is no need to modify the
+ * test code.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
+
+/**
+ * Class indicating that this test is a generic test, which is not a member of
+ * a more specific test class. Tests which are not created with a specific set
+ * of filter flags by the user are assigned this test class by default.
+ */
+#define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
+
+/**
+ * Class indicating this test is a resource allocation failure test.
+ * A resource allocation failure test will test that an error code is
+ * correctly propagated when an allocation fails.
+ */
+#define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
+
+/**
+ * Additional flag indicating that this test is an expected failure when
+ * run in resource failure mode. These tests are never run when running
+ * the low resource mode.
+ */
+#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
+
+/**
+ * Flag reserved for user-defined filter zero.
+ */
+#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
+
+/**
+ * Flag reserved for user-defined filter one.
+ */
+#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
+
+/**
+ * Flag reserved for user-defined filter two.
+ */
+#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
+
+/**
+ * Flag reserved for user-defined filter three.
+ */
+#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
+
+/**
+ * Flag reserved for user-defined filter four.
+ */
+#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
+
+/**
+ * Flag reserved for user-defined filter five.
+ */
+#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
+
+/**
+ * Flag reserved for user-defined filter six.
+ */
+#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
+
+/**
+ * Flag reserved for user-defined filter seven.
+ */
+#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
+
+/**
+ * Pseudo-flag indicating that all test classes should be executed.
+ */
+#define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
+
+/**
+ * union kutf_callback_data - Union used to store test callback data
+ * @ptr_value:		pointer to the location where test callback data
+ *                      are stored
+ * @u32_value:		a number which represents test callback data
+ */
+union kutf_callback_data {
+	void *ptr_value;
+	u32  u32_value;
+};
+
+/**
+ * struct kutf_userdata_line - A line of user data to be returned to the user
+ * @node:   struct list_head to link this into a list
+ * @str:    The line of user data to return to user space
+ * @size:   The number of bytes within @str
+ */
+struct kutf_userdata_line {
+	struct list_head node;
+	char *str;
+	size_t size;
+};
+
+/**
+ * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output
+ *
+ * If user space reads the "run" file while the test is waiting for user data,
+ * then the framework will output a warning message and set this flag within
+ * struct kutf_userdata. A subsequent read will then simply return an end of
+ * file condition rather than outputting the warning again. The upshot of this
+ * is that simply running 'cat' on a test which requires user data will produce
+ * the warning followed by 'cat' exiting due to EOF - which is much more user
+ * friendly than blocking indefinitely waiting for user data.
+ */
+#define KUTF_USERDATA_WARNING_OUTPUT  1
+
+/**
+ * struct kutf_userdata - Structure holding user data
+ * @flags:       See %KUTF_USERDATA_WARNING_OUTPUT
+ * @input_head:  List of struct kutf_userdata_line containing user data
+ *               to be read by the kernel space test.
+ * @input_waitq: Wait queue signalled when there is new user data to be
+ *               read by the kernel space test.
+ */
+struct kutf_userdata {
+	unsigned long flags;
+	struct list_head input_head;
+	wait_queue_head_t input_waitq;
+};
+
+/**
+ * struct kutf_context - Structure representing a kernel test context
+ * @kref:		Refcount for number of users of this context
+ * @suite:		Convenience pointer to the suite this context
+ *                      is running
+ * @test_fix:		The fixture that is being run in this context
+ * @fixture_pool:	The memory pool used for the duration of
+ *                      the fixture/text context.
+ * @fixture:		The user provided fixture structure.
+ * @fixture_index:	The index (id) of the current fixture.
+ * @fixture_name:	The name of the current fixture (or NULL if unnamed).
+ * @test_data:		Any user private data associated with this test
+ * @result_set:		All the results logged by this test context
+ * @status:		The status of the currently running fixture.
+ * @expected_status:	The expected status on exist of the currently
+ *                      running fixture.
+ * @work:		Work item to enqueue onto the work queue to run the test
+ * @userdata:		Structure containing the user data for the test to read
+ */
+struct kutf_context {
+	struct kref                     kref;
+	struct kutf_suite               *suite;
+	struct kutf_test_fixture        *test_fix;
+	struct kutf_mempool             fixture_pool;
+	void                            *fixture;
+	unsigned int                    fixture_index;
+	const char                      *fixture_name;
+	union kutf_callback_data        test_data;
+	struct kutf_result_set          *result_set;
+	enum kutf_result_status         status;
+	enum kutf_result_status         expected_status;
+
+	struct work_struct              work;
+	struct kutf_userdata            userdata;
+};
+
+/**
+ * struct kutf_suite - Structure representing a kernel test suite
+ * @app:			The application this suite belongs to.
+ * @name:			The name of this suite.
+ * @suite_data:			Any user private data associated with this
+ *                              suite.
+ * @create_fixture:		Function used to create a new fixture instance
+ * @remove_fixture:		Function used to destroy a new fixture instance
+ * @fixture_variants:		The number of variants (must be at least 1).
+ * @suite_default_flags:	Suite global filter flags which are set on
+ *                              all tests.
+ * @node:			List node for suite_list
+ * @dir:			The debugfs directory for this suite
+ * @test_list:			List head to store all the tests which are
+ *                              part of this suite
+ */
+struct kutf_suite {
+	struct kutf_application        *app;
+	const char                     *name;
+	union kutf_callback_data       suite_data;
+	void *(*create_fixture)(struct kutf_context *context);
+	void  (*remove_fixture)(struct kutf_context *context);
+	unsigned int                   fixture_variants;
+	unsigned int                   suite_default_flags;
+	struct list_head               node;
+	struct dentry                  *dir;
+	struct list_head               test_list;
+};
+
+/* ============================================================================
+	Application functions
+============================================================================ */
+
+/**
+ * kutf_create_application() - Create an in kernel test application.
+ * @name:	The name of the test application.
+ *
+ * Return: pointer to the kutf_application  on success or NULL
+ * on failure
+ */
+struct kutf_application *kutf_create_application(const char *name);
+
+/**
+ * kutf_destroy_application() - Destroy an in kernel test application.
+ *
+ * @app:	The test application to destroy.
+ */
+void kutf_destroy_application(struct kutf_application *app);
+
+/* ============================================================================
+	Suite functions
+============================================================================ */
+
+/**
+ * kutf_create_suite() - Create a kernel test suite.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *                      is stored in the fixture pointer in the context for
+ *                      use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context));
+
+/**
+ * kutf_create_suite_with_filters() - Create a kernel test suite with user
+ *                                    defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *                      functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ *
+ * Suite names must be unique. Should two suites with the same name be
+ * registered with the same application then this function will fail, if they
+ * are registered with different applications then the function will not detect
+ * this and the call will succeed.
+ *
+ * Return: pointer to the created kutf_suite on success or NULL on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with
+ *                                             user defined default filters.
+ * @app:		The test application to create the suite in.
+ * @name:		The name of the suite.
+ * @fixture_count:	The number of fixtures to run over the test
+ *			functions in this suite
+ * @create_fixture:	Callback used to create a fixture. The returned value
+ *			is stored in the fixture pointer in the context for
+ *			use in the test functions.
+ * @remove_fixture:	Callback used to remove a previously created fixture.
+ * @filters:		Filters to apply to a test if it doesn't provide its own
+ * @suite_data:		Suite specific callback data, provided during the
+ *			running of the test in the kutf_context
+ *
+ * Return: pointer to the created kutf_suite on success or NULL
+ * on failure
+ */
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data);
+
+/**
+ * kutf_add_test() - Add a test to a kernel test suite.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ *
+ * Note: As no filters are provided the test will use the suite filters instead
+ */
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context));
+
+/**
+ * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ */
+void kutf_add_test_with_filters(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters);
+
+/**
+ * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite
+ *					   with filters.
+ * @suite:	The suite to add the test to.
+ * @id:		The ID of the test.
+ * @name:	The name of the test.
+ * @execute:	Callback to the test function to run.
+ * @filters:	A set of filtering flags, assigning test categories.
+ * @test_data:	Test specific callback data, provided during the
+ *		running of the test in the kutf_context
+ */
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data);
+
+
+/* ============================================================================
+	Test functions
+============================================================================ */
+/**
+ * kutf_test_log_result_external() - Log a result which has been created
+ *                                   externally into a in a standard form
+ *                                   recognized by the log parser.
+ * @context:	The test context the test is running in
+ * @message:	The message for this result
+ * @new_status:	The result status of this log message
+ */
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status);
+
+/**
+ * kutf_test_expect_abort() - Tell the kernel that you expect the current
+ *                            fixture to produce an abort.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_abort(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fatal() - Tell the kernel that you expect the current
+ *                            fixture to produce a fatal error.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fatal(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_fail() - Tell the kernel that you expect the current
+ *                           fixture to fail.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_fail(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_warn() - Tell the kernel that you expect the current
+ *                           fixture to produce a warning.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_warn(struct kutf_context *context);
+
+/**
+ * kutf_test_expect_pass() - Tell the kernel that you expect the current
+ *                           fixture to pass.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_expect_pass(struct kutf_context *context);
+
+/**
+ * kutf_test_skip() - Tell the kernel that the test should be skipped.
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_skip(struct kutf_context *context);
+
+/**
+ * kutf_test_skip_msg() - Tell the kernel that this test has been skipped,
+ *                        supplying a reason string.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the skip.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a prebaked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_skip_msg(struct kutf_context *context, const char *message);
+
+/**
+ * kutf_test_pass() - Tell the kernel that this test has passed.
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the reason for the pass.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_pass(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_debug() - Send a debug message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the debug information.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_debug(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_info() - Send an information message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the information message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_info(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_warn() - Send a warning message
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the warning message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_warn(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fail() - Tell the kernel that a test has failed
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the failure message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fail(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error
+ * @context:	The test context this test is running in.
+ * @message:	A message string containing the fatal error message.
+ *
+ * Note: The message must not be freed during the lifetime of the test run.
+ * This means it should either be a pre-baked string, or if a dynamic string
+ * is required it must be created with kutf_dsprintf which will store
+ * the resultant string in a buffer who's lifetime is the same as the test run.
+ */
+void kutf_test_fatal(struct kutf_context *context, char const *message);
+
+/**
+ * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test
+ *
+ * @context:	The test context this test is running in.
+ */
+void kutf_test_abort(struct kutf_context *context);
+
+#endif	/* _KERNEL_UTF_SUITE_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
new file mode 100644
index 0000000000000..25b8285500d7d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KERNEL_UTF_UTILS_H_
+#define _KERNEL_UTF_UTILS_H_
+
+/* kutf_utils.h
+ * Utilities for the kernel UTF test infrastructure.
+ *
+ * This collection of library functions are provided for use by kernel UTF
+ * and users of kernel UTF which don't directly fit within the other
+ * code modules.
+ */
+
+#include <kutf/kutf_mem.h>
+
+/**
+ * Maximum size of the message strings within kernel UTF, messages longer then
+ * this will be truncated.
+ */
+#define KUTF_MAX_DSPRINTF_LEN	1024
+
+/**
+ * kutf_dsprintf() - dynamic sprintf
+ * @pool:	memory pool to allocate from
+ * @fmt:	The format string describing the string to document.
+ * @...		The parameters to feed in to the format string.
+ *
+ * This function implements sprintf which dynamically allocates memory to store
+ * the string. The library will free the memory containing the string when the
+ * result set is cleared or destroyed.
+ *
+ * Note The returned string may be truncated to fit an internal temporary
+ * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length.
+ *
+ * Return: Returns pointer to allocated string, or NULL on error.
+ */
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...);
+
+#endif	/* _KERNEL_UTF_UTILS_H_ */
diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/drivers/gpu/arm/midgard/tests/kutf/Kbuild
new file mode 100644
index 0000000000000..2531d41ca28d6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include
+
+obj-$(CONFIG_MALI_KUTF) += kutf.o
+
+kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o
diff --git a/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/drivers/gpu/arm/midgard/tests/kutf/Kconfig
new file mode 100644
index 0000000000000..0cdb474c06a3a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/Kconfig
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+
+config MALI_KUTF
+ tristate "Mali Kernel Unit Test Framework"
+ default m
+ help
+   Enables MALI testing framework. To compile it as a module,
+   choose M here - this will generate a single module called kutf.
diff --git a/drivers/gpu/arm/midgard/tests/kutf/Makefile b/drivers/gpu/arm/midgard/tests/kutf/Makefile
new file mode 100644
index 0000000000000..d848e8774bd08
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/Makefile
@@ -0,0 +1,35 @@
+#
+# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/tests/kutf/build.bp b/drivers/gpu/arm/midgard/tests/kutf/build.bp
new file mode 100644
index 0000000000000..960c8faa8df95
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -0,0 +1,31 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "kutf",
+    defaults: ["kernel_defaults"],
+    srcs: [
+        "Kbuild",
+        "kutf_helpers.c",
+        "kutf_helpers_user.c",
+        "kutf_mem.c",
+        "kutf_resultset.c",
+        "kutf_suite.c",
+        "kutf_utils.c",
+    ],
+    kbuild_options: ["CONFIG_MALI_KUTF=m"],
+    include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"],
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+    },
+}
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
new file mode 100644
index 0000000000000..cab5add6d93c6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers */
+#include <kutf/kutf_helpers.h>
+
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#include <linux/wait.h>
+#include <linux/uaccess.h>
+
+static DEFINE_SPINLOCK(kutf_input_lock);
+
+static bool pending_input(struct kutf_context *context)
+{
+	bool input_pending;
+
+	spin_lock(&kutf_input_lock);
+
+	input_pending = !list_empty(&context->userdata.input_head);
+
+	spin_unlock(&kutf_input_lock);
+
+	return input_pending;
+}
+
+char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
+{
+	struct kutf_userdata_line *line;
+
+	spin_lock(&kutf_input_lock);
+
+	while (list_empty(&context->userdata.input_head)) {
+		int err;
+
+		kutf_set_waiting_for_input(context->result_set);
+
+		spin_unlock(&kutf_input_lock);
+
+		err = wait_event_interruptible(context->userdata.input_waitq,
+				pending_input(context));
+
+		if (err)
+			return ERR_PTR(-EINTR);
+
+		spin_lock(&kutf_input_lock);
+	}
+
+	line = list_first_entry(&context->userdata.input_head,
+			struct kutf_userdata_line, node);
+	if (line->str) {
+		/*
+		 * Unless it is the end-of-input marker,
+		 * remove it from the list
+		 */
+		list_del(&line->node);
+	}
+
+	spin_unlock(&kutf_input_lock);
+
+	if (str_size)
+		*str_size = line->size;
+	return line->str;
+}
+
+int kutf_helper_input_enqueue(struct kutf_context *context,
+		const char __user *str, size_t size)
+{
+	struct kutf_userdata_line *line;
+
+	line = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(*line) + size + 1);
+	if (!line)
+		return -ENOMEM;
+	if (str) {
+		unsigned long bytes_not_copied;
+
+		line->size = size;
+		line->str = (void *)(line + 1);
+		bytes_not_copied = copy_from_user(line->str, str, size);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		/* Zero terminate the string */
+		line->str[size] = '\0';
+	} else {
+		/* This is used to mark the end of input */
+		WARN_ON(size);
+		line->size = 0;
+		line->str = NULL;
+	}
+
+	spin_lock(&kutf_input_lock);
+
+	list_add_tail(&line->node, &context->userdata.input_head);
+
+	kutf_clear_waiting_for_input(context->result_set);
+
+	spin_unlock(&kutf_input_lock);
+
+	wake_up(&context->userdata.input_waitq);
+
+	return 0;
+}
+
+void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
+{
+	kutf_helper_input_enqueue(context, NULL, 0);
+}
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
new file mode 100644
index 0000000000000..108fa82d9b216
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c
@@ -0,0 +1,468 @@
+/*
+ *
+ * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF test helpers that mirror those for kutf-userside */
+#include <kutf/kutf_helpers_user.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_utils.h>
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+const char *valtype_names[] = {
+	"INVALID",
+	"U64",
+	"STR",
+};
+
+static const char *get_val_type_name(enum kutf_helper_valtype valtype)
+{
+	/* enums can be signed or unsigned (implementation dependant), so
+	 * enforce it to prevent:
+	 * a) "<0 comparison on unsigned type" warning - if we did both upper
+	 *    and lower bound check
+	 * b) incorrect range checking if it was a signed type - if we did
+	 *    upper bound check only */
+	unsigned int type_idx = (unsigned int)valtype;
+
+	if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT)
+		type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID;
+
+	return valtype_names[type_idx];
+}
+
+/* Check up to str_len chars of val_str to see if it's a valid value name:
+ *
+ * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator
+ * - And, each char is in the character set [A-Z0-9_] */
+static int validate_val_name(const char *val_str, int str_len)
+{
+	int i = 0;
+
+	for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) {
+		char val_chr = val_str[i];
+
+		if (val_chr >= 'A' && val_chr <= 'Z')
+			continue;
+		if (val_chr >= '0' && val_chr <= '9')
+			continue;
+		if (val_chr == '_')
+			continue;
+
+		/* Character not in the set [A-Z0-9_] - report error */
+		return 1;
+	}
+
+	/* Names of 0 length are not valid */
+	if (i == 0)
+		return 1;
+	/* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */
+	if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'))
+		return 1;
+
+	return 0;
+}
+
+/* Find the length of the valid part of the string when it will be in quotes
+ * e.g. "str"
+ *
+ * That is, before any '\\', '\n' or '"' characters. This is so we don't have
+ * to escape the string */
+static int find_quoted_string_valid_len(const char *str)
+{
+	char *ptr;
+	const char *check_chars = "\\\n\"";
+
+	ptr = strpbrk(str, check_chars);
+	if (ptr)
+		return (int)(ptr-str);
+
+	return (int)strlen(str);
+}
+
+static int kutf_helper_userdata_enqueue(struct kutf_context *context,
+		const char *str)
+{
+	char *str_copy;
+	size_t len;
+	int err;
+
+	len = strlen(str)+1;
+
+	str_copy = kutf_mempool_alloc(&context->fixture_pool, len);
+	if (!str_copy)
+		return -ENOMEM;
+
+	strcpy(str_copy, str);
+
+	err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy);
+
+	return err;
+}
+
+#define MAX_U64_HEX_LEN 16
+/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */
+#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1)
+
+int kutf_helper_send_named_u64(struct kutf_context *context,
+		const char *val_name, u64 val)
+{
+	int ret = 1;
+	char msgbuf[NAMED_U64_VAL_BUF_SZ];
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+
+	ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val);
+	if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d",
+				val_name, NAMED_U64_VAL_BUF_SZ, ret);
+		goto out_err;
+	}
+
+	ret = kutf_helper_userdata_enqueue(context, msgbuf);
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	return ret;
+out_err:
+	kutf_test_fail(context, errmsg);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_u64);
+
+#define NAMED_VALUE_SEP "="
+#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\""
+#define NAMED_STR_END_DELIM "\""
+
+int kutf_helper_max_str_len_for_kern(const char *val_name,
+		int kern_buf_sz)
+{
+	const int val_name_len = strlen(val_name);
+	const int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	const int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	int max_msg_len = kern_buf_sz;
+	int max_str_len;
+
+	max_str_len = max_msg_len - val_name_len - start_delim_len -
+		end_delim_len;
+
+	return max_str_len;
+}
+EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern);
+
+int kutf_helper_send_named_str(struct kutf_context *context,
+		const char *val_name,
+		const char *val_str)
+{
+	int val_str_len;
+	int str_buf_sz;
+	char *str_buf = NULL;
+	int ret = 1;
+	char *copy_ptr;
+	int val_name_len;
+	int start_delim_len = strlen(NAMED_STR_START_DELIM);
+	int end_delim_len = strlen(NAMED_STR_END_DELIM);
+	const char *errmsg = NULL;
+
+	if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send u64 value named '%s': Invalid value name", val_name);
+		goto out_err;
+	}
+	val_name_len = strlen(val_name);
+
+	val_str_len = find_quoted_string_valid_len(val_str);
+
+	/* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */
+	str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1;
+
+	/* Using kmalloc() here instead of mempool since we know we need to free
+	 * before we return */
+	str_buf = kmalloc(str_buf_sz, GFP_KERNEL);
+	if (!str_buf) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d",
+				val_name, str_buf_sz);
+		goto out_err;
+	}
+	copy_ptr = str_buf;
+
+	/* Manually copy each string component instead of snprintf because
+	 * val_str may need to end early, and less error path handling */
+
+	/* name */
+	memcpy(copy_ptr, val_name, val_name_len);
+	copy_ptr += val_name_len;
+
+	/* str start delimiter */
+	memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len);
+	copy_ptr += start_delim_len;
+
+	/* str value */
+	memcpy(copy_ptr, val_str, val_str_len);
+	copy_ptr += val_str_len;
+
+	/* str end delimiter */
+	memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len);
+	copy_ptr += end_delim_len;
+
+	/* Terminator */
+	*copy_ptr = '\0';
+
+	ret = kutf_helper_userdata_enqueue(context, str_buf);
+
+	if (ret) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to send str value named '%s': send returned %d",
+				val_name, ret);
+		goto out_err;
+	}
+
+	kfree(str_buf);
+	return ret;
+
+out_err:
+	kutf_test_fail(context, errmsg);
+	kfree(str_buf);
+	return ret;
+}
+EXPORT_SYMBOL(kutf_helper_send_named_str);
+
+int kutf_helper_receive_named_val(
+		struct kutf_context *context,
+		struct kutf_helper_named_val *named_val)
+{
+	size_t recv_sz;
+	char *recv_str;
+	char *search_ptr;
+	char *name_str = NULL;
+	int name_len;
+	int strval_len;
+	enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID;
+	char *strval = NULL;
+	u64 u64val = 0;
+	int err = KUTF_HELPER_ERR_INVALID_VALUE;
+
+	recv_str = kutf_helper_input_dequeue(context, &recv_sz);
+	if (!recv_str)
+		return -EBUSY;
+	else if (IS_ERR(recv_str))
+		return PTR_ERR(recv_str);
+
+	/* Find the '=', grab the name and validate it */
+	search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]);
+	if (search_ptr) {
+		name_len = search_ptr - recv_str;
+		if (!validate_val_name(recv_str, name_len)) {
+			/* no need to reallocate - just modify string in place */
+			name_str = recv_str;
+			name_str[name_len] = '\0';
+
+			/* Move until after the '=' */
+			recv_str += (name_len + 1);
+			recv_sz -= (name_len + 1);
+		}
+	}
+	if (!name_str) {
+		pr_err("Invalid name part for received string '%s'\n",
+				recv_str);
+		return KUTF_HELPER_ERR_INVALID_NAME;
+	}
+
+	/* detect value type */
+	if (*recv_str == NAMED_STR_START_DELIM[1]) {
+		/* string delimiter start*/
+		++recv_str;
+		--recv_sz;
+
+		/* Find end of string */
+		search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]);
+		if (search_ptr) {
+			strval_len = search_ptr - recv_str;
+			/* Validate the string to ensure it contains no quotes */
+			if (strval_len == find_quoted_string_valid_len(recv_str)) {
+				/* no need to reallocate - just modify string in place */
+				strval = recv_str;
+				strval[strval_len] = '\0';
+
+				/* Move until after the end delimiter */
+				recv_str += (strval_len + 1);
+				recv_sz -= (strval_len + 1);
+				type = KUTF_HELPER_VALTYPE_STR;
+			} else {
+				pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str);
+				err = KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+			}
+		} else {
+			pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str);
+			err = KUTF_HELPER_ERR_NO_END_DELIMITER;
+		}
+	} else {
+		/* possibly a number value - strtoull will parse it */
+		err = kstrtoull(recv_str, 0, &u64val);
+		/* unlike userspace can't get an end ptr, but if kstrtoull()
+		 * reads characters after the number it'll report -EINVAL */
+		if (!err) {
+			int len_remain = strnlen(recv_str, recv_sz);
+
+			type = KUTF_HELPER_VALTYPE_U64;
+			recv_str += len_remain;
+			recv_sz -= len_remain;
+		} else {
+			/* special case: not a number, report as such */
+			pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str);
+		}
+	}
+
+	if (type == KUTF_HELPER_VALTYPE_INVALID)
+		return err;
+
+	/* Any remaining characters - error */
+	if (strnlen(recv_str, recv_sz) != 0) {
+		pr_err("Characters remain after value of type %s: '%s'\n",
+				get_val_type_name(type), recv_str);
+		return KUTF_HELPER_ERR_CHARS_AFTER_VAL;
+	}
+
+	/* Success - write into the output structure */
+	switch (type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = u64val;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		named_val->u.val_str = strval;
+		break;
+	default:
+		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		/* Coding error, report as though 'run' file failed */
+		return -EINVAL;
+	}
+
+	named_val->val_name = name_str;
+	named_val->type = type;
+
+	return KUTF_HELPER_ERR_NONE;
+}
+EXPORT_SYMBOL(kutf_helper_receive_named_val);
+
+#define DUMMY_MSG "<placeholder due to test fail>"
+int kutf_helper_receive_check_val(
+		struct kutf_helper_named_val *named_val,
+		struct kutf_context *context,
+		const char *expect_val_name,
+		enum kutf_helper_valtype expect_val_type)
+{
+	int err;
+
+	err = kutf_helper_receive_named_val(context, named_val);
+	if (err < 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Failed to receive value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		return err;
+	} else if (err > 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Named-value parse error when expecting value named '%s'",
+				expect_val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	if (strcmp(named_val->val_name, expect_val_name) != 0) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting to receive value named '%s' but got '%s'",
+				expect_val_name, named_val->val_name);
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+
+	if (named_val->type != expect_val_type) {
+		const char *msg = kutf_dsprintf(&context->fixture_pool,
+				"Expecting value named '%s' to be of type %s but got %s",
+				expect_val_name, get_val_type_name(expect_val_type),
+				get_val_type_name(named_val->type));
+		kutf_test_fail(context, msg);
+		goto out_fail_and_fixup;
+	}
+
+	return err;
+
+out_fail_and_fixup:
+	/* Produce a valid but incorrect value */
+	switch (expect_val_type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		named_val->u.val_u64 = 0ull;
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		{
+			char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG));
+
+			if (!str)
+				return -1;
+
+			strcpy(str, DUMMY_MSG);
+			named_val->u.val_str = str;
+			break;
+		}
+	default:
+		break;
+	}
+
+	/* Indicate that this is invalid */
+	named_val->type = KUTF_HELPER_VALTYPE_INVALID;
+
+	/* But at least allow the caller to continue in the test with failures */
+	return 0;
+}
+EXPORT_SYMBOL(kutf_helper_receive_check_val);
+
+void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val)
+{
+	switch (named_val->type) {
+	case KUTF_HELPER_VALTYPE_U64:
+		pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64);
+		break;
+	case KUTF_HELPER_VALTYPE_STR:
+		pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str);
+		break;
+	case KUTF_HELPER_VALTYPE_INVALID:
+		pr_warn("%s is invalid\n", named_val->val_name);
+		break;
+	default:
+		pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type);
+		break;
+	}
+}
+EXPORT_SYMBOL(kutf_helper_output_named_val);
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
new file mode 100644
index 0000000000000..fd98beaeb84a9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c
@@ -0,0 +1,108 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF memory management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <kutf/kutf_mem.h>
+
+
+/**
+ * struct kutf_alloc_entry - Structure representing an allocation.
+ * @node:	List node for use with kutf_mempool.
+ * @data:	Data area of the allocation
+ */
+struct kutf_alloc_entry {
+	struct list_head node;
+	u8 data[0];
+};
+
+int kutf_mempool_init(struct kutf_mempool *pool)
+{
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return -1;
+	}
+
+	INIT_LIST_HEAD(&pool->head);
+	mutex_init(&pool->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kutf_mempool_init);
+
+void kutf_mempool_destroy(struct kutf_mempool *pool)
+{
+	struct list_head *remove;
+	struct list_head *tmp;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		return;
+	}
+
+	mutex_lock(&pool->lock);
+	list_for_each_safe(remove, tmp, &pool->head) {
+		struct kutf_alloc_entry *remove_alloc;
+
+		remove_alloc = list_entry(remove, struct kutf_alloc_entry, node);
+		list_del(&remove_alloc->node);
+		kfree(remove_alloc);
+	}
+	mutex_unlock(&pool->lock);
+
+}
+EXPORT_SYMBOL(kutf_mempool_destroy);
+
+void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size)
+{
+	struct kutf_alloc_entry *ret;
+
+	if (!pool) {
+		pr_err("NULL pointer passed to %s\n", __func__);
+		goto fail_pool;
+	}
+
+	mutex_lock(&pool->lock);
+
+	ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL);
+	if (!ret) {
+		pr_err("Failed to allocate memory\n");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&ret->node);
+	list_add(&ret->node, &pool->head);
+
+	mutex_unlock(&pool->lock);
+
+	return &ret->data[0];
+
+fail_alloc:
+	mutex_unlock(&pool->lock);
+fail_pool:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_mempool_alloc);
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
new file mode 100644
index 0000000000000..94ecfa4421e18
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF result management functions */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+
+/* Lock to protect all result structures */
+static DEFINE_SPINLOCK(kutf_result_lock);
+
+struct kutf_result_set *kutf_create_result_set(void)
+{
+	struct kutf_result_set *set;
+
+	set = kmalloc(sizeof(*set), GFP_KERNEL);
+	if (!set) {
+		pr_err("Failed to allocate resultset");
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&set->results);
+	init_waitqueue_head(&set->waitq);
+	set->flags = 0;
+
+	return set;
+
+fail_alloc:
+	return NULL;
+}
+
+int kutf_add_result(struct kutf_context *context,
+		enum kutf_result_status status,
+		const char *message)
+{
+	struct kutf_mempool *mempool = &context->fixture_pool;
+	struct kutf_result_set *set = context->result_set;
+	/* Create the new result */
+	struct kutf_result *new_result;
+
+	BUG_ON(set == NULL);
+
+	new_result = kutf_mempool_alloc(mempool, sizeof(*new_result));
+	if (!new_result) {
+		pr_err("Result allocation failed\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&new_result->node);
+	new_result->status = status;
+	new_result->message = message;
+
+	spin_lock(&kutf_result_lock);
+
+	list_add_tail(&new_result->node, &set->results);
+
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+
+	return 0;
+}
+
+void kutf_destroy_result_set(struct kutf_result_set *set)
+{
+	if (!list_empty(&set->results))
+		pr_err("kutf_destroy_result_set: Unread results from test\n");
+
+	kfree(set);
+}
+
+static bool kutf_has_result(struct kutf_result_set *set)
+{
+	bool has_result;
+
+	spin_lock(&kutf_result_lock);
+	if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT)
+		/* Pretend there are results if waiting for input */
+		has_result = true;
+	else
+		has_result = !list_empty(&set->results);
+	spin_unlock(&kutf_result_lock);
+
+	return has_result;
+}
+
+struct kutf_result *kutf_remove_result(struct kutf_result_set *set)
+{
+	struct kutf_result *result = NULL;
+	int ret;
+
+	do {
+		ret = wait_event_interruptible(set->waitq,
+				kutf_has_result(set));
+
+		if (ret)
+			return ERR_PTR(ret);
+
+		spin_lock(&kutf_result_lock);
+
+		if (!list_empty(&set->results)) {
+			result = list_first_entry(&set->results,
+					struct kutf_result,
+					node);
+			list_del(&result->node);
+		} else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) {
+			/* Return a fake result */
+			static struct kutf_result waiting = {
+				.status = KUTF_RESULT_USERDATA_WAIT
+			};
+			result = &waiting;
+		}
+		/* If result == NULL then there was a race with the event
+		 * being removed between the check in kutf_has_result and
+		 * the lock being obtained. In this case we retry
+		 */
+
+		spin_unlock(&kutf_result_lock);
+	} while (result == NULL);
+
+	return result;
+}
+
+void kutf_set_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+
+	wake_up(&set->waitq);
+}
+
+void kutf_clear_waiting_for_input(struct kutf_result_set *set)
+{
+	spin_lock(&kutf_result_lock);
+	set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT;
+	spin_unlock(&kutf_result_lock);
+}
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
new file mode 100644
index 0000000000000..1c350bb339fb7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -0,0 +1,1205 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF suite, test and fixture management including user to kernel
+ * interaction */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+
+#include <generated/autoconf.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_resultset.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * struct kutf_application - Structure which represents kutf application
+ * @name:	The name of this test application.
+ * @dir:	The debugfs directory for this test
+ * @suite_list:	List head to store all the suites which are part of this
+ *              application
+ */
+struct kutf_application {
+	const char         *name;
+	struct dentry      *dir;
+	struct list_head   suite_list;
+};
+
+/**
+ * struct kutf_test_function - Structure which represents kutf test function
+ * @suite:		Back reference to the suite this test function
+ *                      belongs to
+ * @filters:		Filters that apply to this test function
+ * @test_id:		Test ID
+ * @execute:		Function to run for this test
+ * @test_data:		Static data for this test
+ * @node:		List node for test_list
+ * @variant_list:	List head to store all the variants which can run on
+ *                      this function
+ * @dir:		debugfs directory for this test function
+ */
+struct kutf_test_function {
+	struct kutf_suite  *suite;
+	unsigned int       filters;
+	unsigned int       test_id;
+	void (*execute)(struct kutf_context *context);
+	union kutf_callback_data test_data;
+	struct list_head   node;
+	struct list_head   variant_list;
+	struct dentry      *dir;
+};
+
+/**
+ * struct kutf_test_fixture - Structure which holds information on the kutf
+ *                            test fixture
+ * @test_func:		Test function this fixture belongs to
+ * @fixture_index:	Index of this fixture
+ * @node:		List node for variant_list
+ * @dir:		debugfs directory for this test fixture
+ */
+struct kutf_test_fixture {
+	struct kutf_test_function *test_func;
+	unsigned int              fixture_index;
+	struct list_head          node;
+	struct dentry             *dir;
+};
+
+static struct dentry *base_dir;
+static struct workqueue_struct *kutf_workq;
+
+/**
+ * struct kutf_convert_table - Structure which keeps test results
+ * @result_name:	Status of the test result
+ * @result:		Status value for a single test
+ */
+struct kutf_convert_table {
+	char                    result_name[50];
+	enum kutf_result_status result;
+};
+
+struct kutf_convert_table kutf_convert[] = {
+#define ADD_UTF_RESULT(_name) \
+{ \
+	#_name, \
+	_name, \
+},
+ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK)
+ADD_UTF_RESULT(KUTF_RESULT_SKIP)
+ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN)
+ADD_UTF_RESULT(KUTF_RESULT_PASS)
+ADD_UTF_RESULT(KUTF_RESULT_DEBUG)
+ADD_UTF_RESULT(KUTF_RESULT_INFO)
+ADD_UTF_RESULT(KUTF_RESULT_WARN)
+ADD_UTF_RESULT(KUTF_RESULT_FAIL)
+ADD_UTF_RESULT(KUTF_RESULT_FATAL)
+ADD_UTF_RESULT(KUTF_RESULT_ABORT)
+};
+
+#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert))
+
+/**
+ * kutf_create_context() - Create a test context in which a specific fixture
+ *                         of an application will be run and its results
+ *                         reported back to the user
+ * @test_fix:	Test fixture to be run.
+ *
+ * The context's refcount will be initialized to 1.
+ *
+ * Return: Returns the created test context on success or NULL on failure
+ */
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix);
+
+/**
+ * kutf_destroy_context() - Destroy a previously created test context, only
+ *                          once its refcount has become zero
+ * @kref:	pointer to kref member within the context
+ *
+ * This should only be used via a kref_put() call on the context's kref member
+ */
+static void kutf_destroy_context(struct kref *kref);
+
+/**
+ * kutf_context_get() - increment refcount on a context
+ * @context:	the kutf context
+ *
+ * This must be used when the lifetime of the context might exceed that of the
+ * thread creating @context
+ */
+static void kutf_context_get(struct kutf_context *context);
+
+/**
+ * kutf_context_put() - decrement refcount on a context, destroying it when it
+ *                      reached zero
+ * @context:	the kutf context
+ *
+ * This must be used only after a corresponding kutf_context_get() call on
+ * @context, and the caller no longer needs access to @context.
+ */
+static void kutf_context_put(struct kutf_context *context);
+
+/**
+ * kutf_set_result() - Set the test result against the specified test context
+ * @context:	Test context
+ * @status:	Result status
+ */
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status);
+
+/**
+ * kutf_set_expected_result() - Set the expected test result for the specified
+ *                              test context
+ * @context:		Test context
+ * @expected_status:	Expected result status
+ */
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status);
+
+/**
+ * kutf_result_to_string() - Converts a KUTF result into a string
+ * @result_str:      Output result string
+ * @result:          Result status to convert
+ *
+ * Return: 1 if test result was successfully converted to string, 0 otherwise
+ */
+static int kutf_result_to_string(char **result_str,
+		enum kutf_result_status result)
+{
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < UTF_CONVERT_SIZE; i++) {
+		if (result == kutf_convert[i].result) {
+			*result_str = kutf_convert[i].result_name;
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/**
+ * kutf_debugfs_const_string_read() - Simple debugfs read callback which
+ *                                    returns a constant string
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * Return: On success, the number of bytes read and offset @ppos advanced by
+ *         this number; on error, negative value
+ */
+static ssize_t kutf_debugfs_const_string_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	char *str = file->private_data;
+
+	return simple_read_from_buffer(buf, len, ppos, str, strlen(str));
+}
+
+static const struct file_operations kutf_debugfs_const_string_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = kutf_debugfs_const_string_read,
+	.llseek  = default_llseek,
+};
+
+/**
+ * kutf_add_explicit_result() - Check if an explicit result needs to be added
+ * @context:	KUTF test context
+ */
+static void kutf_add_explicit_result(struct kutf_context *context)
+{
+	switch (context->expected_status) {
+	case KUTF_RESULT_UNKNOWN:
+		if (context->status == KUTF_RESULT_UNKNOWN)
+			kutf_test_pass(context, "(implicit pass)");
+		break;
+
+	case KUTF_RESULT_WARN:
+		if (context->status == KUTF_RESULT_WARN)
+			kutf_test_pass(context,
+					"Pass (expected warn occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected warn missing)");
+		break;
+
+	case KUTF_RESULT_FAIL:
+		if (context->status == KUTF_RESULT_FAIL)
+			kutf_test_pass(context,
+					"Pass (expected fail occurred)");
+		else if (context->status != KUTF_RESULT_SKIP) {
+			/* Force the expected status so the fail gets logged */
+			context->expected_status = KUTF_RESULT_PASS;
+			kutf_test_fail(context,
+					"Fail (expected fail missing)");
+		}
+		break;
+
+	case KUTF_RESULT_FATAL:
+		if (context->status == KUTF_RESULT_FATAL)
+			kutf_test_pass(context,
+					"Pass (expected fatal occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected fatal missing)");
+		break;
+
+	case KUTF_RESULT_ABORT:
+		if (context->status == KUTF_RESULT_ABORT)
+			kutf_test_pass(context,
+					"Pass (expected abort occurred)");
+		else if (context->status != KUTF_RESULT_SKIP)
+			kutf_test_fail(context,
+					"Fail (expected abort missing)");
+		break;
+	default:
+		break;
+	}
+}
+
+static void kutf_run_test(struct work_struct *data)
+{
+	struct kutf_context *test_context = container_of(data,
+			struct kutf_context, work);
+	struct kutf_suite *suite = test_context->suite;
+	struct kutf_test_function *test_func;
+
+	test_func = test_context->test_fix->test_func;
+
+	/*
+	 * Call the create fixture function if required before the
+	 * fixture is run
+	 */
+	if (suite->create_fixture)
+		test_context->fixture = suite->create_fixture(test_context);
+
+	/* Only run the test if the fixture was created (if required) */
+	if ((suite->create_fixture && test_context->fixture) ||
+			(!suite->create_fixture)) {
+		/* Run this fixture */
+		test_func->execute(test_context);
+
+		if (suite->remove_fixture)
+			suite->remove_fixture(test_context);
+
+		kutf_add_explicit_result(test_context);
+	}
+
+	kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL);
+
+	kutf_context_put(test_context);
+}
+
+/**
+ * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * @inode:	inode of the opened file
+ * @file:	Opened file to read from
+ *
+ * This function creates a KUTF context and queues it onto a workqueue to be
+ * run asynchronously. The resulting file descriptor can be used to communicate
+ * userdata to the test and to read back the results of the test execution.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_open(struct inode *inode, struct file *file)
+{
+	struct kutf_test_fixture *test_fix = inode->i_private;
+	struct kutf_context *test_context;
+	int err = 0;
+
+	test_context = kutf_create_context(test_fix);
+	if (!test_context) {
+		err = -ENOMEM;
+		goto finish;
+	}
+
+	file->private_data = test_context;
+
+	/* This reference is release by the kutf_run_test */
+	kutf_context_get(test_context);
+
+	queue_work(kutf_workq, &test_context->work);
+
+finish:
+	return err;
+}
+
+#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n"
+
+/**
+ * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry.
+ * @file:	Opened file to read from
+ * @buf:	User buffer to write the data into
+ * @len:	Amount of data to read
+ * @ppos:	Offset into file to read from
+ *
+ * This function emits the results of the test, blocking until they are
+ * available.
+ *
+ * If the test involves user data then this will also return user data records
+ * to user space. If the test is waiting for user data then this function will
+ * output a message (to make the likes of 'cat' display it), followed by
+ * returning 0 to mark the end of file.
+ *
+ * Results will be emitted one at a time, once all the results have been read
+ * 0 will be returned to indicate there is no more data.
+ *
+ * Return: Number of bytes read.
+ */
+static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct kutf_context *test_context = file->private_data;
+	struct kutf_result *res;
+	unsigned long bytes_not_copied;
+	ssize_t bytes_copied = 0;
+	char *kutf_str_ptr = NULL;
+	size_t kutf_str_len = 0;
+	size_t message_len = 0;
+	char separator = ':';
+	char terminator = '\n';
+
+	res = kutf_remove_result(test_context->result_set);
+
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	/*
+	 * Handle 'fake' results - these results are converted to another
+	 * form before being returned from the kernel
+	 */
+	switch (res->status) {
+	case KUTF_RESULT_TEST_FINISHED:
+		return 0;
+	case KUTF_RESULT_USERDATA_WAIT:
+		if (test_context->userdata.flags &
+				KUTF_USERDATA_WARNING_OUTPUT) {
+			/*
+			 * Warning message already output,
+			 * signal end-of-file
+			 */
+			return 0;
+		}
+
+		message_len = sizeof(USERDATA_WARNING_MESSAGE)-1;
+		if (message_len > len)
+			message_len = len;
+
+		bytes_not_copied = copy_to_user(buf,
+				USERDATA_WARNING_MESSAGE,
+				message_len);
+		if (bytes_not_copied != 0)
+			return -EFAULT;
+		test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT;
+		return message_len;
+	case KUTF_RESULT_USERDATA:
+		message_len = strlen(res->message);
+		if (message_len > len-1) {
+			message_len = len-1;
+			pr_warn("User data truncated, read not long enough\n");
+		}
+		bytes_not_copied = copy_to_user(buf, res->message,
+				message_len);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		/* Finally the terminator */
+		bytes_not_copied = copy_to_user(&buf[message_len],
+				&terminator, 1);
+		if (bytes_not_copied != 0) {
+			pr_warn("Failed to copy data to user space buffer\n");
+			return -EFAULT;
+		}
+		return message_len+1;
+	default:
+		/* Fall through - this is a test result */
+		break;
+	}
+
+	/* Note: This code assumes a result is read completely */
+	kutf_result_to_string(&kutf_str_ptr, res->status);
+	if (kutf_str_ptr)
+		kutf_str_len = strlen(kutf_str_ptr);
+
+	if (res->message)
+		message_len = strlen(res->message);
+
+	if ((kutf_str_len + 1 + message_len + 1) > len) {
+		pr_err("Not enough space in user buffer for a single result");
+		return 0;
+	}
+
+	/* First copy the result string */
+	if (kutf_str_ptr) {
+		bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr,
+						kutf_str_len);
+		bytes_copied += kutf_str_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Then the separator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&separator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+	if (bytes_not_copied)
+		goto exit;
+
+	/* Finally Next copy the result string */
+	if (res->message) {
+		bytes_not_copied = copy_to_user(&buf[bytes_copied],
+						res->message, message_len);
+		bytes_copied += message_len - bytes_not_copied;
+		if (bytes_not_copied)
+			goto exit;
+	}
+
+	/* Finally the terminator */
+	bytes_not_copied = copy_to_user(&buf[bytes_copied],
+					&terminator, 1);
+	bytes_copied += 1 - bytes_not_copied;
+
+exit:
+	return bytes_copied;
+}
+
+/**
+ * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * @file:	Opened file to write to
+ * @buf:	User buffer to read the data from
+ * @len:	Amount of data to write
+ * @ppos:	Offset into file to write to
+ *
+ * This function allows user and kernel to exchange extra data necessary for
+ * the test fixture.
+ *
+ * The data is added to the first struct kutf_context running the fixture
+ *
+ * Return: Number of bytes written
+ */
+static ssize_t kutf_debugfs_run_write(struct file *file,
+		const char __user *buf, size_t len, loff_t *ppos)
+{
+	int ret = 0;
+	struct kutf_context *test_context = file->private_data;
+
+	if (len > KUTF_MAX_LINE_LENGTH)
+		return -EINVAL;
+
+	ret = kutf_helper_input_enqueue(test_context, buf, len);
+	if (ret < 0)
+		return ret;
+
+	return len;
+}
+
+/**
+ * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry.
+ * @inode:	File entry representation
+ * @file:	A specific opening of the file
+ *
+ * Release any resources that were created during the opening of the file
+ *
+ * Note that resources may not be released immediately, that might only happen
+ * later when other users of the kutf_context release their refcount.
+ *
+ * Return: 0 on success
+ */
+static int kutf_debugfs_run_release(struct inode *inode, struct file *file)
+{
+	struct kutf_context *test_context = file->private_data;
+
+	kutf_helper_input_enqueue_end_of_data(test_context);
+
+	kutf_context_put(test_context);
+	return 0;
+}
+
+static const struct file_operations kutf_debugfs_run_ops = {
+	.owner = THIS_MODULE,
+	.open = kutf_debugfs_run_open,
+	.read = kutf_debugfs_run_read,
+	.write = kutf_debugfs_run_write,
+	.release = kutf_debugfs_run_release,
+	.llseek  = default_llseek,
+};
+
+/**
+ * create_fixture_variant() - Creates a fixture variant for the specified
+ *                            test function and index and the debugfs entries
+ *                            that represent it.
+ * @test_func:		Test function
+ * @fixture_index:	Fixture index
+ *
+ * Return: 0 on success, negative value corresponding to error code in failure
+ */
+static int create_fixture_variant(struct kutf_test_function *test_func,
+		unsigned int fixture_index)
+{
+	struct kutf_test_fixture *test_fix;
+	char name[11];	/* Enough to print the MAX_UINT32 + the null terminator */
+	struct dentry *tmp;
+	int err;
+
+	test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL);
+	if (!test_fix) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	test_fix->test_func = test_func;
+	test_fix->fixture_index = fixture_index;
+
+	snprintf(name, sizeof(name), "%d", fixture_index);
+	test_fix->dir = debugfs_create_dir(name, test_func->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+	tmp = debugfs_create_file_unsafe(
+#else
+	tmp = debugfs_create_file(
+#endif
+			"run", 0600, test_fix->dir,
+			test_fix,
+			&kutf_debugfs_run_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"run\" when adding fixture\n");
+		/* Might not be the right error, we don't get it passed back to us */
+		err = -EEXIST;
+		goto fail_file;
+	}
+
+	list_add(&test_fix->node, &test_func->variant_list);
+	return 0;
+
+fail_file:
+	debugfs_remove_recursive(test_fix->dir);
+fail_dir:
+	kfree(test_fix);
+fail_alloc:
+	return err;
+}
+
+/**
+ * kutf_remove_test_variant() - Destroy a previously created fixture variant.
+ * @test_fix:	Test fixture
+ */
+static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix)
+{
+	debugfs_remove_recursive(test_fix->dir);
+	kfree(test_fix);
+}
+
+void kutf_add_test_with_filters_and_data(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data test_data)
+{
+	struct kutf_test_function *test_func;
+	struct dentry *tmp;
+	unsigned int i;
+
+	test_func = kmalloc(sizeof(*test_func), GFP_KERNEL);
+	if (!test_func) {
+		pr_err("Failed to allocate memory when adding test %s\n", name);
+		goto fail_alloc;
+	}
+
+	INIT_LIST_HEAD(&test_func->variant_list);
+
+	test_func->dir = debugfs_create_dir(name, suite->dir);
+	if (!test_func->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_dir;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->filters = filters;
+	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+				 &test_func->filters);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	test_func->test_id = id;
+	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+				 &test_func->test_id);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	for (i = 0; i < suite->fixture_variants; i++) {
+		if (create_fixture_variant(test_func, i)) {
+			pr_err("Failed to create fixture %d when adding test %s\n", i, name);
+			goto fail_file;
+		}
+	}
+
+	test_func->suite = suite;
+	test_func->execute = execute;
+	test_func->test_data = test_data;
+
+	list_add(&test_func->node, &suite->test_list);
+	return;
+
+fail_file:
+	debugfs_remove_recursive(test_func->dir);
+fail_dir:
+	kfree(test_func);
+fail_alloc:
+	return;
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters_and_data);
+
+void kutf_add_test_with_filters(
+		struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test_with_filters);
+
+void kutf_add_test(struct kutf_suite *suite,
+		unsigned int id,
+		const char *name,
+		void (*execute)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+
+	kutf_add_test_with_filters_and_data(suite,
+					    id,
+					    name,
+					    execute,
+					    suite->suite_default_flags,
+					    data);
+}
+EXPORT_SYMBOL(kutf_add_test);
+
+/**
+ * kutf_remove_test(): Remove a previously added test function.
+ * @test_func: Test function
+ */
+static void kutf_remove_test(struct kutf_test_function *test_func)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &test_func->variant_list) {
+		struct kutf_test_fixture *test_fix;
+
+		test_fix = list_entry(pos, struct kutf_test_fixture, node);
+		kutf_remove_test_variant(test_fix);
+	}
+
+	list_del(&test_func->node);
+	debugfs_remove_recursive(test_func->dir);
+	kfree(test_func);
+}
+
+struct kutf_suite *kutf_create_suite_with_filters_and_data(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters,
+		union kutf_callback_data suite_data)
+{
+	struct kutf_suite *suite;
+	struct dentry *tmp;
+
+	suite = kmalloc(sizeof(*suite), GFP_KERNEL);
+	if (!suite) {
+		pr_err("Failed to allocate memory when creating suite %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	suite->dir = debugfs_create_dir(name, app->dir);
+	if (!suite->dir) {
+		pr_err("Failed to create debugfs directory when adding test %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&suite->test_list);
+	suite->app = app;
+	suite->name = name;
+	suite->fixture_variants = fixture_count;
+	suite->create_fixture = create_fixture;
+	suite->remove_fixture = remove_fixture;
+	suite->suite_default_flags = filters;
+	suite->suite_data = suite_data;
+
+	list_add(&suite->node, &app->suite_list);
+
+	return suite;
+
+fail_file:
+	debugfs_remove_recursive(suite->dir);
+fail_debugfs:
+	kfree(suite);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data);
+
+struct kutf_suite *kutf_create_suite_with_filters(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context),
+		unsigned int filters)
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       filters,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite_with_filters);
+
+struct kutf_suite *kutf_create_suite(
+		struct kutf_application *app,
+		const char *name,
+		unsigned int fixture_count,
+		void *(*create_fixture)(struct kutf_context *context),
+		void (*remove_fixture)(struct kutf_context *context))
+{
+	union kutf_callback_data data;
+
+	data.ptr_value = NULL;
+	return kutf_create_suite_with_filters_and_data(app,
+						       name,
+						       fixture_count,
+						       create_fixture,
+						       remove_fixture,
+						       KUTF_F_TEST_GENERIC,
+						       data);
+}
+EXPORT_SYMBOL(kutf_create_suite);
+
+/**
+ * kutf_destroy_suite() - Destroy a previously added test suite.
+ * @suite:	Test suite
+ */
+static void kutf_destroy_suite(struct kutf_suite *suite)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &suite->test_list) {
+		struct kutf_test_function *test_func;
+
+		test_func = list_entry(pos, struct kutf_test_function, node);
+		kutf_remove_test(test_func);
+	}
+
+	list_del(&suite->node);
+	debugfs_remove_recursive(suite->dir);
+	kfree(suite);
+}
+
+struct kutf_application *kutf_create_application(const char *name)
+{
+	struct kutf_application *app;
+	struct dentry *tmp;
+
+	app = kmalloc(sizeof(*app), GFP_KERNEL);
+	if (!app) {
+		pr_err("Failed to create allocate memory when creating application %s\n", name);
+		goto fail_kmalloc;
+	}
+
+	app->dir = debugfs_create_dir(name, base_dir);
+	if (!app->dir) {
+		pr_err("Failed to create debugfs direcotry when creating application %s\n", name);
+		goto fail_debugfs;
+	}
+
+	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+				  &kutf_debugfs_const_string_ops);
+	if (!tmp) {
+		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
+		goto fail_file;
+	}
+
+	INIT_LIST_HEAD(&app->suite_list);
+	app->name = name;
+
+	return app;
+
+fail_file:
+	debugfs_remove_recursive(app->dir);
+fail_debugfs:
+	kfree(app);
+fail_kmalloc:
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_create_application);
+
+void kutf_destroy_application(struct kutf_application *app)
+{
+	struct list_head *pos;
+	struct list_head *tmp;
+
+	list_for_each_safe(pos, tmp, &app->suite_list) {
+		struct kutf_suite *suite;
+
+		suite = list_entry(pos, struct kutf_suite, node);
+		kutf_destroy_suite(suite);
+	}
+
+	debugfs_remove_recursive(app->dir);
+	kfree(app);
+}
+EXPORT_SYMBOL(kutf_destroy_application);
+
+static struct kutf_context *kutf_create_context(
+		struct kutf_test_fixture *test_fix)
+{
+	struct kutf_context *new_context;
+
+	new_context = kmalloc(sizeof(*new_context), GFP_KERNEL);
+	if (!new_context) {
+		pr_err("Failed to allocate test context");
+		goto fail_alloc;
+	}
+
+	new_context->result_set = kutf_create_result_set();
+	if (!new_context->result_set) {
+		pr_err("Failed to create result set");
+		goto fail_result_set;
+	}
+
+	new_context->test_fix = test_fix;
+	/* Save the pointer to the suite as the callbacks will require it */
+	new_context->suite = test_fix->test_func->suite;
+	new_context->status = KUTF_RESULT_UNKNOWN;
+	new_context->expected_status = KUTF_RESULT_UNKNOWN;
+
+	kutf_mempool_init(&new_context->fixture_pool);
+	new_context->fixture = NULL;
+	new_context->fixture_index = test_fix->fixture_index;
+	new_context->fixture_name = NULL;
+	new_context->test_data = test_fix->test_func->test_data;
+
+	new_context->userdata.flags = 0;
+	INIT_LIST_HEAD(&new_context->userdata.input_head);
+	init_waitqueue_head(&new_context->userdata.input_waitq);
+
+	INIT_WORK(&new_context->work, kutf_run_test);
+
+	kref_init(&new_context->kref);
+
+	return new_context;
+
+fail_result_set:
+	kfree(new_context);
+fail_alloc:
+	return NULL;
+}
+
+static void kutf_destroy_context(struct kref *kref)
+{
+	struct kutf_context *context;
+
+	context = container_of(kref, struct kutf_context, kref);
+	kutf_destroy_result_set(context->result_set);
+	kutf_mempool_destroy(&context->fixture_pool);
+	kfree(context);
+}
+
+static void kutf_context_get(struct kutf_context *context)
+{
+	kref_get(&context->kref);
+}
+
+static void kutf_context_put(struct kutf_context *context)
+{
+	kref_put(&context->kref, kutf_destroy_context);
+}
+
+
+static void kutf_set_result(struct kutf_context *context,
+		enum kutf_result_status status)
+{
+	context->status = status;
+}
+
+static void kutf_set_expected_result(struct kutf_context *context,
+		enum kutf_result_status expected_status)
+{
+	context->expected_status = expected_status;
+}
+
+/**
+ * kutf_test_log_result() - Log a result for the specified test context
+ * @context:	Test context
+ * @message:	Result string
+ * @new_status:	Result status
+ */
+static void kutf_test_log_result(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	if (context->status < new_status)
+		context->status = new_status;
+
+	if (context->expected_status != new_status)
+		kutf_add_result(context, new_status, message);
+}
+
+void kutf_test_log_result_external(
+	struct kutf_context *context,
+	const char *message,
+	enum kutf_result_status new_status)
+{
+	kutf_test_log_result(context, message, new_status);
+}
+EXPORT_SYMBOL(kutf_test_log_result_external);
+
+void kutf_test_expect_abort(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_expect_abort);
+
+void kutf_test_expect_fatal(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fatal);
+
+void kutf_test_expect_fail(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_expect_fail);
+
+void kutf_test_expect_warn(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_expect_warn);
+
+void kutf_test_expect_pass(struct kutf_context *context)
+{
+	kutf_set_expected_result(context, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_expect_pass);
+
+void kutf_test_skip(struct kutf_context *context)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip);
+
+void kutf_test_skip_msg(struct kutf_context *context, const char *message)
+{
+	kutf_set_result(context, KUTF_RESULT_SKIP);
+	kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN);
+
+	kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool,
+			     "Test skipped: %s", message), KUTF_RESULT_SKIP);
+	kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP);
+}
+EXPORT_SYMBOL(kutf_test_skip_msg);
+
+void kutf_test_debug(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_DEBUG);
+}
+EXPORT_SYMBOL(kutf_test_debug);
+
+void kutf_test_pass(struct kutf_context *context, char const *message)
+{
+	static const char explicit_message[] = "(explicit pass)";
+
+	if (!message)
+		message = explicit_message;
+
+	kutf_test_log_result(context, message, KUTF_RESULT_PASS);
+}
+EXPORT_SYMBOL(kutf_test_pass);
+
+void kutf_test_info(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_INFO);
+}
+EXPORT_SYMBOL(kutf_test_info);
+
+void kutf_test_warn(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_WARN);
+}
+EXPORT_SYMBOL(kutf_test_warn);
+
+void kutf_test_fail(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FAIL);
+}
+EXPORT_SYMBOL(kutf_test_fail);
+
+void kutf_test_fatal(struct kutf_context *context, char const *message)
+{
+	kutf_test_log_result(context, message, KUTF_RESULT_FATAL);
+}
+EXPORT_SYMBOL(kutf_test_fatal);
+
+void kutf_test_abort(struct kutf_context *context)
+{
+	kutf_test_log_result(context, "", KUTF_RESULT_ABORT);
+}
+EXPORT_SYMBOL(kutf_test_abort);
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Create the base entry point in debugfs.
+ */
+static int __init init_kutf_core(void)
+{
+	kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1);
+	if (!kutf_workq)
+		return -ENOMEM;
+
+	base_dir = debugfs_create_dir("kutf_tests", NULL);
+	if (!base_dir) {
+		destroy_workqueue(kutf_workq);
+		kutf_workq = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Remove the base entry point in debugfs.
+ */
+static void __exit exit_kutf_core(void)
+{
+	debugfs_remove_recursive(base_dir);
+
+	if (kutf_workq)
+		destroy_workqueue(kutf_workq);
+}
+
+#else	/* defined(CONFIG_DEBUG_FS) */
+
+/**
+ * init_kutf_core() - Module entry point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static int __init init_kutf_core(void)
+{
+	pr_debug("KUTF requires a kernel with debug fs support");
+
+	return -ENODEV;
+}
+
+/**
+ * exit_kutf_core() - Module exit point.
+ *
+ * Stub for when build against a kernel without debugfs support
+ */
+static void __exit exit_kutf_core(void)
+{
+}
+#endif	/* defined(CONFIG_DEBUG_FS) */
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kutf_core);
+module_exit(exit_kutf_core);
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
new file mode 100644
index 0000000000000..7f5ac517fdb48
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Kernel UTF utility functions */
+
+#include <linux/mutex.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_mem.h>
+
+static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN];
+
+DEFINE_MUTEX(buffer_lock);
+
+const char *kutf_dsprintf(struct kutf_mempool *pool,
+		const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	int size;
+	void *buffer;
+
+	mutex_lock(&buffer_lock);
+	va_start(args, fmt);
+	len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args);
+	va_end(args);
+
+	if (len < 0) {
+		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		goto fail_format;
+	}
+
+	if (len >= sizeof(tmp_buffer)) {
+		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		size = sizeof(tmp_buffer);
+	} else {
+		size = len + 1;
+	}
+
+	buffer = kutf_mempool_alloc(pool, size);
+	if (!buffer)
+		goto fail_alloc;
+
+	memcpy(buffer, tmp_buffer, size);
+	mutex_unlock(&buffer_lock);
+
+	return buffer;
+
+fail_alloc:
+fail_format:
+	mutex_unlock(&buffer_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(kutf_dsprintf);
diff --git a/drivers/gpu/arm/midgard/tests/kutf/sconscript b/drivers/gpu/arm/midgard/tests/kutf/sconscript
new file mode 100644
index 0000000000000..98f64468dac97
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/kutf/sconscript
@@ -0,0 +1,27 @@
+#
+# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import('kutf_env')
+
+make_args = kutf_env.kernel_get_config_defines(ret_list = True)
+
+mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args)
+kutf_env.KernelObjTarget('kutf', mod)
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
new file mode 100644
index 0000000000000..ca8c51273b4c8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o
+
+mali_kutf_irq_test-y := mali_kutf_irq_test_main.o
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
new file mode 100644
index 0000000000000..4a3863afc9bfb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig
@@ -0,0 +1,29 @@
+#
+# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config MALI_IRQ_LATENCY
+ tristate "Mali GPU IRQ latency measurement"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_irq_test that
+   can determine the latency of the Mali GPU IRQ on your system.
+   Choosing M here will generate a single module called mali_kutf_irq_test.
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
new file mode 100644
index 0000000000000..9218a40f80695
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -0,0 +1,49 @@
+#
+# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+TEST_CCFLAGS := \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_USE_CSF=$(MALI_USE_CSF) \
+	$(SCONS_CFLAGS) \
+	-I$(CURDIR)/../include \
+	-I$(CURDIR)/../../../../../../include \
+	-I$(CURDIR)/../../../ \
+	-I$(CURDIR)/../../ \
+	-I$(CURDIR)/../../backend/gpu \
+	-I$(CURDIR)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
new file mode 100644
index 0000000000000..a6669afd85ca3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -0,0 +1,30 @@
+/*
+ * Copyright:
+ * ----------------------------------------------------------------------------
+ * This confidential and proprietary software may be used only as authorized
+ * by a licensing agreement from ARM Limited.
+ *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorized copies and
+ * copies may only be made to the extent permitted by a licensing agreement
+ * from ARM Limited.
+ * ----------------------------------------------------------------------------
+ */
+
+bob_kernel_module {
+    name: "mali_kutf_irq_test",
+    defaults: ["kernel_test_module_defaults"],
+    srcs: [
+        "Kbuild",
+        "mali_kutf_irq_test_main.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+        "kutf",
+    ],
+    install_group: "IG_tests",
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+        kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"],
+    },
+}
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
new file mode 100644
index 0000000000000..4181b7f92db61
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -0,0 +1,273 @@
+/*
+ *
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include "mali_kbase.h"
+#include <midgard/backend/gpu/mali_kbase_device_internal.h>
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+
+/*
+ * This file contains the code which is used for measuring interrupt latency
+ * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is
+ * used with this purpose and it is called within KUTF framework - a kernel
+ * unit test framework. The measured latency provided by this test should
+ * be representative for the latency of the Mali JOB/MMU IRQs as well.
+ */
+
+/* KUTF test application pointer for this test */
+struct kutf_application *irq_app;
+
+/**
+ * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * @kbdev:	kbase device for the GPU.
+ *
+ */
+struct kutf_irq_fixture_data {
+	struct kbase_device *kbdev;
+};
+
+#define SEC_TO_NANO(s)	      ((s)*1000000000LL)
+
+/* ID for the GPU IRQ */
+#define GPU_IRQ_HANDLER 2
+
+#define NR_TEST_IRQS 1000000
+
+/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not
+ * expect to see this in normal use (e.g., when Android is running). */
+#define TEST_IRQ MULTIPLE_GPU_FAULTS
+
+#define IRQ_TIMEOUT HZ
+
+/* Kernel API for setting irq throttle hook callback and irq time in us*/
+extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+		irq_handler_t custom_handler,
+		int irq_type);
+extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data);
+
+static DECLARE_WAIT_QUEUE_HEAD(wait);
+static bool triggered;
+static u64 irq_time;
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+/**
+ * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler
+ * @irq:  IRQ number
+ * @data: Data associated with this IRQ
+ *
+ * Return: state of the IRQ
+ */
+static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data)
+{
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
+	if (val & TEST_IRQ) {
+		struct timespec tval;
+
+		getnstimeofday(&tval);
+		irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+
+		triggered = true;
+		wake_up(&wait);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Trigger main irq handler */
+	return kbase_gpu_irq_handler(irq, data);
+}
+
+/**
+ * mali_kutf_irq_default_create_fixture() - Creates the fixture data required
+ *                                          for all the tests in the irq suite.
+ * @context:             KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_irq_default_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data;
+
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_irq_fixture_data));
+
+	if (!data)
+		goto fail;
+
+	/* Acquire the kbase device */
+	data->kbdev = kbase_find_device(-1);
+	if (data->kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		goto fail;
+	}
+
+	return data;
+
+fail:
+	return NULL;
+}
+
+/**
+ * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously
+ *                          created by mali_kutf_irq_default_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_irq_default_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	kbase_release_device(kbdev);
+}
+
+/**
+ * mali_kutf_irq_latency() - measure GPU IRQ latency
+ * @context:		kutf context within which to perform the test
+ *
+ * The test triggers IRQs manually, and measures the
+ * time between triggering the IRQ and the IRQ handler being executed.
+ *
+ * This is not a traditional test, in that the pass/fail status has little
+ * meaning (other than indicating that the IRQ handler executed at all). Instead
+ * the results are in the latencies provided with the test result. There is no
+ * meaningful pass/fail result that can be obtained here, instead the latencies
+ * are provided for manual analysis only.
+ */
+static void mali_kutf_irq_latency(struct kutf_context *context)
+{
+	struct kutf_irq_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	u64 min_time = U64_MAX, max_time = 0, average_time = 0;
+	int i;
+	bool test_failed = false;
+
+	/* Force GPU to be powered */
+	kbase_pm_context_active(kbdev);
+
+	kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler,
+			GPU_IRQ_HANDLER);
+
+	for (i = 0; i < NR_TEST_IRQS; i++) {
+		struct timespec tval;
+		u64 start_time;
+		int ret;
+
+		triggered = false;
+		getnstimeofday(&tval);
+		start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
+
+		/* Trigger fake IRQ */
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+				TEST_IRQ);
+
+		ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
+
+		if (ret == 0) {
+			kutf_test_fail(context, "Timed out waiting for IRQ\n");
+			test_failed = true;
+			break;
+		}
+
+		if ((irq_time - start_time) < min_time)
+			min_time = irq_time - start_time;
+		if ((irq_time - start_time) > max_time)
+			max_time = irq_time - start_time;
+		average_time += irq_time - start_time;
+
+		udelay(10);
+	}
+
+	/* Go back to default handler */
+	kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER);
+
+	kbase_pm_context_idle(kbdev);
+
+	if (!test_failed) {
+		const char *results;
+
+		do_div(average_time, NR_TEST_IRQS);
+		results = kutf_dsprintf(&context->fixture_pool,
+				"Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n",
+				min_time, max_time, average_time);
+		kutf_test_pass(context, results);
+	}
+}
+
+/**
+ * Module entry point for this test.
+ */
+int mali_kutf_irq_test_main_init(void)
+{
+	struct kutf_suite *suite;
+
+	irq_app = kutf_create_application("irq");
+
+	if (NULL == irq_app) {
+		pr_warn("Creation of test application failed!\n");
+		return -ENOMEM;
+	}
+
+	suite = kutf_create_suite(irq_app, "irq_default",
+			1, mali_kutf_irq_default_create_fixture,
+			mali_kutf_irq_default_remove_fixture);
+
+	if (NULL == suite) {
+		pr_warn("Creation of test suite failed!\n");
+		kutf_destroy_application(irq_app);
+		return -ENOMEM;
+	}
+
+	kutf_add_test(suite, 0x0, "irq_latency",
+			mali_kutf_irq_latency);
+	return 0;
+}
+
+/**
+ * Module exit point for this test.
+ */
+void mali_kutf_irq_test_main_exit(void)
+{
+	kutf_destroy_application(irq_app);
+}
+
+module_init(mali_kutf_irq_test_main_init);
+module_exit(mali_kutf_irq_test_main_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
new file mode 100644
index 0000000000000..76e37308b1fc5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import os
+Import('env')
+
+src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')]
+
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test'))
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
+else:
+	makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
+	env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
+	env.KernelObjTarget('mali_kutf_irq_test', cmd)
diff --git a/drivers/gpu/arm/midgard/tests/sconscript b/drivers/gpu/arm/midgard/tests/sconscript
new file mode 100644
index 0000000000000..0bd24a5e3f352
--- /dev/null
+++ b/drivers/gpu/arm/midgard/tests/sconscript
@@ -0,0 +1,44 @@
+#
+# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+Import ('env')
+
+kutf_env = env.Clone()
+kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include')
+Export('kutf_env')
+
+if Glob('internal/sconscript'):
+	SConscript('internal/sconscript')
+
+if kutf_env['debug'] == '1':
+	SConscript('kutf/sconscript')
+	SConscript('mali_kutf_irq_test/sconscript')
+
+	if Glob('kutf_test/sconscript'):
+		SConscript('kutf_test/sconscript')
+
+	if Glob('kutf_test_runner/sconscript'):
+		SConscript('kutf_test_runner/sconscript')
+
+if env['unit'] == '1':
+	SConscript('mali_kutf_ipa_test/sconscript')
+	SConscript('mali_kutf_ipa_unit_test/sconscript')
+	SConscript('mali_kutf_vinstr_test/sconscript')
diff --git a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
new file mode 100644
index 0000000000000..3aab51a173f03
--- /dev/null
+++ b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -0,0 +1,363 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//*
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "linux/mman.h"
+#include "../mali_kbase.h"
+
+/* mali_kbase_mmap.c
+ *
+ * This file contains Linux specific implementation of
+ * kbase_get_unmapped_area() interface.
+ */
+
+
+/**
+ * align_and_check() - Align the specified pointer to the provided alignment and
+ *                     check that it is still in range.
+ * @gap_end:        Highest possible start address for allocation (end of gap in
+ *                  address space)
+ * @gap_start:      Start address of current memory area / gap in address space
+ * @info:           vm_unmapped_area_info structure passed to caller, containing
+ *                  alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ *                  additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ *                    a 4GB chunk
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ *         false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+		struct vm_unmapped_area_info *info, bool is_shader_code,
+		bool is_same_4gb_page)
+{
+	/* Compute highest gap address at the desired alignment */
+	(*gap_end) -= info->length;
+	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+
+	if (is_shader_code) {
+		/* Check for 4GB boundary */
+		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+			(*gap_end) -= (info->align_offset ? info->align_offset :
+					info->length);
+
+		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
+				info->length) & BASE_MEM_MASK_4GB))
+			return false;
+	} else if (is_same_4gb_page) {
+		unsigned long start = *gap_end;
+		unsigned long end = *gap_end + info->length;
+		unsigned long mask = ~((unsigned long)U32_MAX);
+
+		/* Check if 4GB boundary is straddled */
+		if ((start & mask) != ((end - 1) & mask)) {
+			unsigned long offset = end - (end & mask);
+			/* This is to ensure that alignment doesn't get
+			 * disturbed in an attempt to prevent straddling at
+			 * 4GB boundary. The GPU VA is aligned to 2MB when the
+			 * allocation size is > 2MB and there is enough CPU &
+			 * GPU virtual space.
+			 */
+			unsigned long rounded_offset =
+					ALIGN(offset, info->align_mask + 1);
+
+			start -= rounded_offset;
+			end -= rounded_offset;
+
+			*gap_end = start;
+
+			/* The preceding 4GB boundary shall not get straddled,
+			 * even after accounting for the alignment, as the
+			 * size of allocation is limited to 4GB and the initial
+			 * start location was already aligned.
+			 */
+			WARN_ON((start & mask) != ((end - 1) & mask));
+		}
+	}
+
+
+	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+		return false;
+
+
+	return true;
+}
+
+/**
+ * kbase_unmapped_area_topdown() - allocates new areas top-down from
+ *                                 below the stack limit.
+ * @info:              Information about the memory area to allocate.
+ * @is_shader_code:    Boolean which denotes whether the allocated area is
+ *                      intended for the use by shader core in which case a
+ *                      special alignment requirements apply.
+ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
+ *                    to reside completely within a 4GB chunk.
+ *
+ * The unmapped_area_topdown() function in the Linux kernel is not exported
+ * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
+ * module and also make use of the fact that some of the requirements for
+ * the unmapped area are known in advance, we implemented an extended version
+ * of this function and prefixed it with 'kbase_'.
+ *
+ * The difference in the call parameter list comes from the fact that
+ * kbase_unmapped_area_topdown() is called with additional parameters which
+ * are provided to indicate whether the allocation is for a shader core memory,
+ * which has additional alignment requirements, and whether the allocation can
+ * straddle a 4GB boundary.
+ *
+ * The modification of the original Linux function lies in how the computation
+ * of the highest gap address at the desired alignment is performed once the
+ * gap with desirable properties is found. For this purpose a special function
+ * is introduced (@ref align_and_check()) which beside computing the gap end
+ * at the desired alignment also performs additional alignment checks for the
+ * case when the memory is executable shader core memory, for which it is
+ * ensured that the gap does not end on a 4GB boundary, and for the case when
+ * memory needs to be confined within a 4GB chunk.
+ *
+ * Return: address of the found gap end (high limit) if area is found;
+ *         -ENOMEM if search is unsuccessful
+*/
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+		*info, bool is_shader_code, bool is_same_4gb_page)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
+
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+	low_limit = info->low_limit + length;
+
+	/* Check highest gap, which does not precede any rbtree node */
+	gap_start = mm->highest_vm_end;
+	if (gap_start <= high_limit) {
+		if (align_and_check(&gap_end, gap_start, info,
+				is_shader_code, is_same_4gb_page))
+			return gap_end;
+	}
+
+	/* Check if rbtree root looks promising */
+	if (RB_EMPTY_ROOT(&mm->mm_rb))
+		return -ENOMEM;
+	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+	if (vma->rb_subtree_gap < length)
+		return -ENOMEM;
+
+	while (true) {
+		/* Visit right subtree if it looks promising */
+		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+			struct vm_area_struct *right =
+				rb_entry(vma->vm_rb.rb_right,
+					 struct vm_area_struct, vm_rb);
+			if (right->rb_subtree_gap >= length) {
+				vma = right;
+				continue;
+			}
+		}
+
+check_current:
+		/* Check if current node has a suitable gap */
+		gap_end = vma->vm_start;
+		if (gap_end < low_limit)
+			return -ENOMEM;
+		if (gap_start <= high_limit && gap_end - gap_start >= length) {
+			/* We found a suitable gap. Clip it with the original
+			 * high_limit. */
+			if (gap_end > info->high_limit)
+				gap_end = info->high_limit;
+
+			if (align_and_check(&gap_end, gap_start, info,
+					is_shader_code, is_same_4gb_page))
+				return gap_end;
+		}
+
+		/* Visit left subtree if it looks promising */
+		if (vma->vm_rb.rb_left) {
+			struct vm_area_struct *left =
+				rb_entry(vma->vm_rb.rb_left,
+					 struct vm_area_struct, vm_rb);
+			if (left->rb_subtree_gap >= length) {
+				vma = left;
+				continue;
+			}
+		}
+
+		/* Go back up the rbtree to find next candidate node */
+		while (true) {
+			struct rb_node *prev = &vma->vm_rb;
+
+			if (!rb_parent(prev))
+				return -ENOMEM;
+			vma = rb_entry(rb_parent(prev),
+				       struct vm_area_struct, vm_rb);
+			if (prev == vma->vm_rb.rb_right) {
+				gap_start = vma->vm_prev ?
+					vma->vm_prev->vm_end : 0;
+				goto check_current;
+			}
+		}
+	}
+
+	return -ENOMEM;
+}
+
+
+/* This function is based on Linux kernel's arch_get_unmapped_area, but
+ * simplified slightly. Modifications come from the fact that some values
+ * about the memory area are known in advance.
+ */
+unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+	unsigned long align_offset = 0;
+	unsigned long align_mask = 0;
+	unsigned long high_limit = mm->mmap_base;
+	unsigned long low_limit = PAGE_SIZE;
+	int cpu_va_bits = BITS_PER_LONG;
+	int gpu_pc_bits =
+	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	bool is_shader_code = false;
+	bool is_same_4gb_page = false;
+	unsigned long ret;
+
+	/* err on fixed address */
+	if ((flags & MAP_FIXED) || addr)
+		return -EINVAL;
+
+#ifdef CONFIG_64BIT
+	/* too big? */
+	if (len > TASK_SIZE - SZ_2M)
+		return -ENOMEM;
+
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+
+		high_limit = min_t(unsigned long, mm->mmap_base,
+				(kctx->same_va_end << PAGE_SHIFT));
+
+		/* If there's enough (> 33 bits) of GPU VA space, align
+		 * to 2MB boundaries.
+		 */
+		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+			if (len >= SZ_2M) {
+				align_offset = SZ_2M;
+				align_mask = SZ_2M - 1;
+			}
+		}
+
+		low_limit = SZ_2M;
+	} else {
+		cpu_va_bits = 32;
+	}
+#endif /* CONFIG_64BIT */
+	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
+		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
+			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+			struct kbase_va_region *reg =
+					kctx->pending_regions[cookie];
+
+			if (!reg)
+				return -EINVAL;
+
+			if (!(reg->flags & KBASE_REG_GPU_NX)) {
+				if (cpu_va_bits > gpu_pc_bits) {
+					align_offset = 1ULL << gpu_pc_bits;
+					align_mask = align_offset - 1;
+					is_shader_code = true;
+				}
+			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+				unsigned long extent_bytes =
+				     (unsigned long)(reg->extent << PAGE_SHIFT);
+				/* kbase_check_alloc_sizes() already satisfies
+				 * these checks, but they're here to avoid
+				 * maintenance hazards due to the assumptions
+				 * involved */
+				WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(!is_power_of_2(extent_bytes));
+				align_mask = extent_bytes - 1;
+				align_offset =
+				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+				is_same_4gb_page = true;
+			}
+#ifndef CONFIG_64BIT
+	} else {
+		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+						      flags);
+#endif
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = low_limit;
+	info.high_limit = high_limit;
+	info.align_offset = align_offset;
+	info.align_mask = align_mask;
+
+	ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+			is_same_4gb_page);
+
+	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
+			high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
+		/* Retry above mmap_base */
+		info.low_limit = mm->mmap_base;
+		info.high_limit = min_t(u64, TASK_SIZE,
+					(kctx->same_va_end << PAGE_SHIFT));
+
+		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+				is_same_4gb_page);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/arm/sconscript b/drivers/gpu/arm/sconscript
new file mode 100644
index 0000000000000..dd02acd2a20ec
--- /dev/null
+++ b/drivers/gpu/arm/sconscript
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+import glob
+
+
+SConscript('midgard/sconscript')
+
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index fc6c4f81985e6..7ca232b087bdd 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -203,6 +203,7 @@ config DRM_SCHED
 source "drivers/gpu/drm/i2c/Kconfig"
 
 source "drivers/gpu/drm/arm/Kconfig"
+source "drivers/gpu/drm/baikal/Kconfig"
 
 config DRM_RADEON
 	tristate "ATI Radeon"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 82ff826b33ccd..a9b4690ed2149 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -120,3 +120,4 @@ obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
 obj-$(CONFIG_DRM_MCDE) += mcde/
+obj-$(CONFIG_DRM_BAIKAL_VDU) += baikal/
diff --git a/drivers/gpu/drm/baikal/Kconfig b/drivers/gpu/drm/baikal/Kconfig
new file mode 100644
index 0000000000000..a1bcdbe578680
--- /dev/null
+++ b/drivers/gpu/drm/baikal/Kconfig
@@ -0,0 +1,13 @@
+config DRM_BAIKAL_VDU
+	tristate "DRM Support for Baikal-M VDU"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on COMMON_CLK
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_GEM_CMA_HELPER
+	select DRM_PANEL
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the Baikal-M Video Display Unit (VDU).
+	  If M is selected the module will be called baikal_vdu_drm.
diff --git a/drivers/gpu/drm/baikal/Makefile b/drivers/gpu/drm/baikal/Makefile
new file mode 100644
index 0000000000000..321d3be96b814
--- /dev/null
+++ b/drivers/gpu/drm/baikal/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+baikal_vdu_drm-y +=	baikal_vdu_backlight.o \
+		baikal_vdu_crtc.o \
+		baikal_vdu_drv.o \
+		baikal_vdu_panel.o \
+		baikal_vdu_plane.o
+
+baikal_vdu_drm-$(CONFIG_DEBUG_FS) += baikal_vdu_debugfs.o
+
+obj-$(CONFIG_DRM_BAIKAL_VDU) += baikal_vdu_drm.o
+obj-$(CONFIG_DRM_BAIKAL_HDMI) += baikal-hdmi.o
diff --git a/drivers/gpu/drm/baikal/baikal-hdmi.c b/drivers/gpu/drm/baikal/baikal-hdmi.c
new file mode 100644
index 0000000000000..d82d68e6650fd
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal-hdmi.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal Electronics BE-M1000 DesignWare HDMI 2.0 Tx PHY support driver
+ *
+ * Copyright (C) 2019-2021 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (C) 2016 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <drm/drm_modes.h>
+
+#include <drm/bridge/dw_hdmi.h>
+
+int fixed_clock = 0;
+int max_clock = 0;
+
+static const struct dw_hdmi_mpll_config baikal_hdmi_mpll_cfg[] = {
+	/* pixelclk      opmode  gmp         */
+	{  44900000, { { 0x00b3, 0x0000 }, }, },
+	{  90000000, { { 0x0072, 0x0001 }, }, },
+	{ 182750000, { { 0x0051, 0x0002 }, }, },
+	{ 340000000, { { 0x0040, 0x0003 }, }, },
+	{ 594000000, { { 0x1a40, 0x0003 }, }, },
+	{ ~0UL,      { { 0x0000, 0x0000 }, }, }
+};
+
+static const struct dw_hdmi_curr_ctrl baikal_hdmi_cur_ctr[] = {
+	/* pixelclk    current   */
+	{  44900000, { 0x0000, }, },
+	{  90000000, { 0x0008, }, },
+	{ 182750000, { 0x001b, }, },
+	{ 340000000, { 0x0036, }, },
+	{ 594000000, { 0x003f, }, },
+	{ ~0UL,      { 0x0000, }, }
+};
+
+static const struct dw_hdmi_phy_config baikal_hdmi_phy_cfg[] = {
+	/* pixelclk  symbol  term    vlev */
+	{ 148250000, 0x8009, 0x0004, 0x0232},
+	{ 218250000, 0x8009, 0x0004, 0x0230},
+	{ 288000000, 0x8009, 0x0004, 0x0273},
+	{ 340000000, 0x8029, 0x0004, 0x0273},
+	{ 594000000, 0x8039, 0x0004, 0x014a},
+	{ ~0UL,      0x0000, 0x0000, 0x0000}
+};
+
+static enum drm_mode_status baikal_hdmi_mode_valid(struct drm_connector *con,
+					    const struct drm_display_mode *mode)
+{
+	if (mode->clock < 13500)
+		return MODE_CLOCK_LOW;
+	if (mode->clock >= 340000)
+		return MODE_CLOCK_HIGH;
+	if (fixed_clock && mode->clock != fixed_clock)
+		return MODE_BAD;
+	if (max_clock && mode->clock > max_clock)
+		return MODE_BAD;
+
+	return MODE_OK;
+}
+
+static struct dw_hdmi_plat_data baikal_dw_hdmi_plat_data = {
+	.mpll_cfg   = baikal_hdmi_mpll_cfg,
+	.cur_ctr    = baikal_hdmi_cur_ctr,
+	.phy_config = baikal_hdmi_phy_cfg,
+	.mode_valid = baikal_hdmi_mode_valid,
+};
+
+static int baikal_dw_hdmi_probe(struct platform_device *pdev)
+{
+	struct dw_hdmi *hdmi;
+	hdmi = dw_hdmi_probe(pdev, &baikal_dw_hdmi_plat_data);
+	if (IS_ERR(hdmi)) {
+		return PTR_ERR(hdmi);
+	} else {
+		return 0;
+	}
+}
+
+static int baikal_dw_hdmi_remove(struct platform_device *pdev)
+{
+	struct dw_hdmi *hdmi = platform_get_drvdata(pdev);
+	dw_hdmi_remove(hdmi);
+	return 0;
+}
+
+static const struct of_device_id baikal_dw_hdmi_of_table[] = {
+	{ .compatible = "baikal,hdmi" },
+	{ /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, baikal_dw_hdmi_of_table);
+
+static struct platform_driver baikal_dw_hdmi_platform_driver = {
+	.probe		= baikal_dw_hdmi_probe,
+	.remove		= baikal_dw_hdmi_remove,
+	.driver		= {
+		.name	= "baikal-dw-hdmi",
+		.of_match_table = baikal_dw_hdmi_of_table,
+	},
+};
+
+module_param(fixed_clock, int, 0644);
+module_param(max_clock, int, 0644);
+
+module_platform_driver(baikal_dw_hdmi_platform_driver);
+
+MODULE_AUTHOR("Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal BE-M1000 SoC DesignWare HDMI 2.0 Tx + Gen2 PHY Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_backlight.c b/drivers/gpu/drm/baikal/baikal_vdu_backlight.c
new file mode 100644
index 0000000000000..70059e0795586
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_backlight.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+/**
+ * baikal_vdu_backlight.c
+ * Implementation of backlight functions for
+ * Baikal Electronics BE-M1000 SoC's VDU
+ */
+
+#include <linux/clk.h>
+#include <linux/input.h>
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+#define BAIKAL_VDU_MIN_BRIGHTNESS	0
+#define BAIKAL_VDU_DEFAULT_BRIGHTNESS	50
+#define BAIKAL_VDU_BRIGHTNESS_STEP	5
+#define BAIKAL_VDU_DEFAULT_PWM_FREQ	10000
+
+static int baikal_vdu_backlight_update_status(struct backlight_device *bl_dev)
+{
+	struct baikal_vdu_private *priv = bl_get_data(bl_dev);
+	int brightness_on = 1;
+	int brightness = bl_dev->props.brightness;
+	u8 pwmdc;
+
+	if (bl_dev->props.power != FB_BLANK_UNBLANK ||
+	    bl_dev->props.fb_blank != FB_BLANK_UNBLANK ||
+	    bl_dev->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK)) {
+		brightness_on = 0;
+		brightness = priv->min_brightness;
+	}
+
+	if (priv->enable_gpio)
+		gpiod_set_value_cansleep(priv->enable_gpio, brightness_on);
+
+	pwmdc = brightness ? ((brightness << 6) / 25 - 1) : 0;
+
+	writel(pwmdc, priv->regs + PWMDCR);
+
+	return 0;
+}
+
+static const struct backlight_ops baikal_vdu_backlight_ops = {
+	.options        = BL_CORE_SUSPENDRESUME,
+	.update_status	= baikal_vdu_backlight_update_status,
+};
+
+static void baikal_vdu_input_event(struct input_handle *handle,
+				   unsigned int type, unsigned int code,
+				   int value)
+{
+	struct baikal_vdu_private *priv = handle->private;
+	int brightness;
+
+	if (type != EV_KEY || value == 0)
+		return;
+
+	switch (code) {
+	case KEY_BRIGHTNESSDOWN:
+		brightness = priv->bl_dev->props.brightness -
+			     priv->brightness_step;
+		if (brightness >= priv->min_brightness)
+			backlight_device_set_brightness(priv->bl_dev,
+							brightness);
+		break;
+
+	case KEY_BRIGHTNESSUP:
+		brightness = priv->bl_dev->props.brightness +
+			     priv->brightness_step;
+		backlight_device_set_brightness(priv->bl_dev, brightness);
+		break;
+
+	case KEY_BRIGHTNESS_TOGGLE:
+		priv->brightness_on = !priv->brightness_on;
+		if (priv->brightness_on)
+			backlight_enable(priv->bl_dev);
+		else
+			backlight_disable(priv->bl_dev);
+		break;
+
+	default:
+		return;
+	}
+
+	backlight_force_update(priv->bl_dev, BACKLIGHT_UPDATE_HOTKEY);
+}
+
+static int baikal_vdu_input_connect(struct input_handler *handler,
+				    struct input_dev *dev,
+				    const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int ret;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->private = handler->private;
+	handle->name = KBUILD_MODNAME;
+	handle->dev = dev;
+	handle->handler = handler;
+
+	ret = input_register_handle(handle);
+	if (ret)
+		goto err_free_handle;
+
+	ret = input_open_device(handle);
+	if (ret)
+		goto err_unregister_handle;
+
+	return 0;
+
+err_unregister_handle:
+	input_unregister_handle(handle);
+err_free_handle:
+	kfree(handle);
+	return ret;
+}
+
+static void baikal_vdu_input_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id baikal_vdu_input_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+	},
+
+	{ },    /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(input, baikal_vdu_input_ids);
+
+int baikal_vdu_backlight_create(struct drm_device *drm)
+{
+	struct baikal_vdu_crossbar *crossbar = drm->dev_private;
+	struct baikal_vdu_private *priv = &crossbar->lvds;
+	struct device *dev = drm->dev;
+	struct backlight_properties props;
+	struct input_handler *handler;
+	struct device_node *node;
+	u32 min_brightness = BAIKAL_VDU_MIN_BRIGHTNESS;
+	u32 dfl_brightness = BAIKAL_VDU_DEFAULT_BRIGHTNESS;
+	u32 brightness_step = BAIKAL_VDU_BRIGHTNESS_STEP;
+	u32 pwm_frequency = 0;
+	int ret = 0;
+	unsigned long rate;
+	unsigned int pwmfr = 0;
+
+	priv->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_ASIS);
+	if (IS_ERR(priv->enable_gpio)) {
+		dev_warn(dev, "failed to get ENABLE GPIO\n");
+		priv->enable_gpio = NULL;
+	}
+
+	if (priv->enable_gpio && gpiod_get_direction(priv->enable_gpio) != 0)
+		gpiod_direction_output(priv->enable_gpio, 1);
+
+	node = of_get_child_by_name(dev->of_node, "backlight");
+	if (!node)
+		return 0;
+
+	of_property_read_u32(node, "min-brightness-level", &min_brightness);
+	of_property_read_u32(node, "default-brightness-level", &dfl_brightness);
+	of_property_read_u32(node, "brightness-level-step", &brightness_step);
+	of_property_read_u32(node, "pwm-frequency", &pwm_frequency);
+
+	if (pwm_frequency == 0) {
+		dev_warn(dev, "using default PWM frequency %u\n",
+			 BAIKAL_VDU_DEFAULT_PWM_FREQ);
+		pwm_frequency = BAIKAL_VDU_DEFAULT_PWM_FREQ;
+	}
+
+	memset(&props, 0, sizeof(props));
+	props.max_brightness = 100;
+	props.type = BACKLIGHT_RAW;
+	props.scale = BACKLIGHT_SCALE_LINEAR;
+
+	if (min_brightness > props.max_brightness) {
+		dev_warn(dev, "invalid min brightness level: %u, using %u\n",
+			 min_brightness, props.max_brightness);
+		min_brightness = props.max_brightness;
+	}
+
+	if (dfl_brightness > props.max_brightness ||
+	    dfl_brightness < min_brightness) {
+		dev_warn(dev,
+			 "invalid default brightness level: %u, using %u\n",
+			 dfl_brightness, props.max_brightness);
+		dfl_brightness = props.max_brightness;
+	}
+
+	priv->min_brightness = min_brightness;
+	priv->brightness_step = brightness_step;
+	priv->brightness_on = true;
+
+	props.brightness = dfl_brightness;
+	props.power = FB_BLANK_UNBLANK;
+
+	priv->bl_dev =
+		devm_backlight_device_register(dev, dev_name(dev), dev, priv,
+					       &baikal_vdu_backlight_ops,
+					       &props);
+	if (IS_ERR(priv->bl_dev)) {
+		dev_err(dev, "failed to register backlight device\n");
+		ret = PTR_ERR(priv->bl_dev);
+		priv->bl_dev = NULL;
+		goto out;
+	}
+
+	handler = devm_kzalloc(dev, sizeof(*handler), GFP_KERNEL);
+	if (!handler) {
+		dev_err(dev, "failed to allocate input handler\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	handler->private = priv;
+	handler->event = baikal_vdu_input_event;
+	handler->connect = baikal_vdu_input_connect;
+	handler->disconnect = baikal_vdu_input_disconnect;
+	handler->name = KBUILD_MODNAME;
+	handler->id_table = baikal_vdu_input_ids;
+
+	ret = input_register_handler(handler);
+	if (ret) {
+		dev_err(dev, "failed to register input handler\n");
+		goto out;
+	}
+
+	/* Hold PWM Clock Domain Reset, disable clocking */
+	writel(0, priv->regs + PWMFR);
+
+	rate = clk_get_rate(priv->clk);
+	pwmfr |= PWMFR_PWMFCD(rate / pwm_frequency - 1) | PWMFR_PWMFCI;
+	writel(pwmfr, priv->regs + PWMFR);
+
+	/* Release PWM Clock Domain Reset, enable clocking */
+	writel(pwmfr | PWMFR_PWMPCR | PWMFR_PWMFCE, priv->regs + PWMFR);
+
+	backlight_update_status(priv->bl_dev);
+out:
+	of_node_put(node);
+	return ret;
+}
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_crtc.c b/drivers/gpu/drm/baikal/baikal_vdu_crtc.c
new file mode 100644
index 0000000000000..1042feed14454
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_crtc.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+/**
+ * baikal_vdu_crtc.c
+ * Implementation of the CRTC functions for Baikal Electronics BE-M1000 VDU driver
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/dma-buf.h>
+#include <linux/shmem_fs.h>
+#include <linux/version.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_vblank.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+irqreturn_t baikal_vdu_irq(int irq, void *data)
+{
+	struct baikal_vdu_private *priv = data;
+	irqreturn_t status = IRQ_NONE;
+	u32 raw_stat;
+	u32 irq_stat;
+
+	priv->counters[0]++;
+	irq_stat = readl(priv->regs + IVR);
+	raw_stat = readl(priv->regs + ISR);
+
+	if (raw_stat & INTR_UFU) {
+		priv->counters[4]++;
+		status = IRQ_HANDLED;
+	}
+
+	if (raw_stat & INTR_IFO) {
+		priv->counters[5]++;
+		status = IRQ_HANDLED;
+	}
+
+	if (raw_stat & INTR_OFU) {
+		priv->counters[6]++;
+		status = IRQ_HANDLED;
+	}
+
+	if (irq_stat & INTR_FER) {
+		priv->counters[11]++;
+		priv->counters[12] = readl(priv->regs + DBAR);
+		priv->counters[13] = readl(priv->regs + DCAR);
+		priv->counters[14] = readl(priv->regs + MRR);
+		status = IRQ_HANDLED;
+	}
+
+	priv->counters[3] |= raw_stat;
+
+	/* Clear all interrupts */
+	writel(raw_stat, priv->regs + ISR);
+
+	return status;
+}
+
+static void baikal_vdu_crtc_helper_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct baikal_vdu_private *priv = crtc_to_baikal_vdu(crtc);
+	const struct drm_display_mode *mode = &crtc->state->mode;
+	unsigned long rate;
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int reg;
+	int ret = 0;
+
+	drm_mode_debug_printmodeline(mode);
+
+	rate = mode->clock * 1000;
+
+	if (rate != clk_get_rate(priv->clk)) {
+		DRM_DEV_DEBUG_DRIVER(dev->dev, "Requested pixel clock is %lu Hz\n", rate);
+
+		/* hold clock domain reset; disable clocking */
+		writel(0, priv->regs + PCTR);
+
+		if (__clk_is_enabled(priv->clk))
+			clk_disable_unprepare(priv->clk);
+		ret = clk_set_rate(priv->clk, rate);
+
+		if (ret >= 0) {
+			clk_prepare_enable(priv->clk);
+			if (!__clk_is_enabled(priv->clk))
+				ret = -1;
+		}
+
+		/* release clock domain reset; enable clocking */
+		reg = readl(priv->regs + PCTR);
+		reg |= PCTR_PCR + PCTR_PCI;
+		writel(reg, priv->regs + PCTR);
+	}
+
+	if (ret < 0)
+		DRM_ERROR("Cannot set desired pixel clock (%lu Hz)\n", rate);
+
+	ppl = mode->hdisplay / 16;
+	if (priv->index == CRTC_LVDS && priv-> num_lanes == 2) {
+		hsw = mode->hsync_end - mode->hsync_start;
+		hfp = mode->hsync_start - mode->hdisplay - 1;
+	} else {
+		hsw = mode->hsync_end - mode->hsync_start - 1;
+		hfp = mode->hsync_start - mode->hdisplay;
+	}
+	hbp = mode->htotal - mode->hsync_end;
+
+	lpp = mode->vdisplay;
+	vsw = mode->vsync_end - mode->vsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	writel((HTR_HFP(hfp) & HTR_HFP_MASK) |
+			(HTR_PPL(ppl) & HTR_PPL_MASK) |
+			(HTR_HBP(hbp) & HTR_HBP_MASK) |
+			(HTR_HSW(hsw) & HTR_HSW_MASK),
+			priv->regs + HTR);
+
+	if (mode->hdisplay > 4080 || ppl * 16 != mode->hdisplay)
+		writel((HPPLOR_HPPLO(mode->hdisplay) & HPPLOR_HPPLO_MASK) | HPPLOR_HPOE,
+				priv->regs + HPPLOR);
+
+	writel((VTR1_VSW(vsw) & VTR1_VSW_MASK) |
+			(VTR1_VFP(vfp) & VTR1_VFP_MASK) |
+			(VTR1_VBP(vbp) & VTR1_VBP_MASK),
+			priv->regs + VTR1);
+
+	writel(lpp & VTR2_LPP_MASK, priv->regs + VTR2);
+
+	writel((HVTER_VSWE(vsw >> VTR1_VSW_LSB_WIDTH) & HVTER_VSWE_MASK) |
+			(HVTER_HSWE(hsw >> HTR_HSW_LSB_WIDTH) & HVTER_HSWE_MASK) |
+			(HVTER_VBPE(vbp >> VTR1_VBP_LSB_WIDTH) & HVTER_VBPE_MASK) |
+			(HVTER_VFPE(vfp >> VTR1_VFP_LSB_WIDTH) & HVTER_VFPE_MASK) |
+			(HVTER_HBPE(hbp >> HTR_HBP_LSB_WIDTH) & HVTER_HBPE_MASK) |
+			(HVTER_HFPE(hfp >> HTR_HFP_LSB_WIDTH) & HVTER_HFPE_MASK),
+			priv->regs + HVTER);
+
+	/* Set polarities */
+	reg = readl(priv->regs + CR1);
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		reg |= CR1_HSP;
+	else
+		reg &= ~CR1_HSP;
+	reg &= ~CR1_VSP; // always set VSP to active high
+	reg |= CR1_DEP; // set DE to active high;
+	writel(reg, priv->regs + CR1);
+}
+
+static enum drm_mode_status baikal_vdu_mode_valid(struct drm_crtc *crtc,
+	                const struct drm_display_mode *mode)
+{
+	struct baikal_vdu_private *priv = crtc_to_baikal_vdu(crtc);
+	if (!priv->mode_override && (mode->hdisplay > 2560 ||
+			mode->vdisplay > 1440))
+		return MODE_BAD;
+	else
+		return MODE_OK;
+}
+
+static void baikal_vdu_crtc_helper_enable(struct drm_crtc *crtc,
+					  struct drm_crtc_state *old_state)
+{
+	struct baikal_vdu_private *priv = crtc_to_baikal_vdu(crtc);
+	const char *data_mapping = NULL;
+	u32 cntl, gpio;
+
+	DRM_DEV_DEBUG_DRIVER(crtc->dev->dev, "enabling pixel clock\n");
+	clk_prepare_enable(priv->clk);
+
+	/* Set 16-word input FIFO watermark */
+	/* Enable and Power Up */
+	cntl = readl(priv->regs + CR1);
+	cntl &= ~(CR1_FDW_MASK | CR1_OPS_MASK);
+	cntl |= CR1_LCE | CR1_FDW_16_WORDS;
+
+	if (priv->index == CRTC_LVDS) {
+		of_property_read_string(priv->bridge->of_node, "data-mapping", &data_mapping);
+		if (!data_mapping) {
+			cntl |= CR1_OPS_LCD18;
+		} else if (!strncmp(data_mapping, "vesa-24", 7))
+			cntl |= CR1_OPS_LCD24;
+		else if (!strncmp(data_mapping, "jeida-18", 8))
+			cntl |= CR1_OPS_LCD18;
+		else {
+			dev_warn(crtc->dev->dev, "%s data mapping is not supported, vesa-24 is set\n", data_mapping);
+			cntl |= CR1_OPS_LCD24;
+		}
+		gpio = GPIOR_UHD_ENB;
+		if (priv->num_lanes == 4)
+			gpio |= GPIOR_UHD_QUAD_PORT;
+		else if (priv->num_lanes == 2)
+			gpio |= GPIOR_UHD_DUAL_PORT;
+		else
+			gpio |= GPIOR_UHD_SNGL_PORT;
+		writel(gpio, priv->regs + GPIOR);
+	} else
+		cntl |= CR1_OPS_LCD24;
+	writel(cntl, priv->regs + CR1);
+
+	writel(0x3ffff, priv->regs + ISR);
+	writel(INTR_FER, priv->regs + IMR);
+}
+
+void baikal_vdu_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	struct baikal_vdu_private *priv = crtc_to_baikal_vdu(crtc);
+
+	writel(0x3ffff, priv->regs + ISR);
+	writel(0, priv->regs + IMR);
+
+	/* Disable clock */
+	DRM_DEV_DEBUG_DRIVER(crtc->dev->dev, "disabling pixel clock\n");
+	clk_disable_unprepare(priv->clk);
+}
+
+static void baikal_vdu_crtc_helper_atomic_flush(struct drm_crtc *crtc,
+					   struct drm_crtc_state *old_state)
+{
+	struct drm_pending_vblank_event *event = crtc->state->event;
+
+	if (event) {
+		crtc->state->event = NULL;
+
+		spin_lock_irq(&crtc->dev->event_lock);
+		if (crtc->state->active && drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = drm_atomic_helper_crtc_reset,
+	.destroy = drm_crtc_cleanup,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set_nofb = baikal_vdu_crtc_helper_mode_set_nofb,
+	.mode_valid = baikal_vdu_mode_valid,
+	.atomic_flush = baikal_vdu_crtc_helper_atomic_flush,
+	.disable = baikal_vdu_crtc_helper_disable,
+	.atomic_enable = baikal_vdu_crtc_helper_enable,
+
+};
+
+int baikal_vdu_crtc_create(struct baikal_vdu_private *priv)
+{
+	struct drm_device *dev = priv->drm;
+	struct drm_crtc *crtc = &priv->crtc;
+
+	drm_crtc_init_with_planes(dev, crtc,
+				  &priv->primary, NULL,
+				  &crtc_funcs, NULL);
+	drm_crtc_helper_add(crtc, &crtc_helper_funcs);
+
+	DRM_DEV_DEBUG_DRIVER(crtc->dev->dev, "enabling pixel clock\n");
+	clk_prepare_enable(priv->clk);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_debugfs.c b/drivers/gpu/drm/baikal/baikal_vdu_debugfs.c
new file mode 100644
index 0000000000000..8b41a3e87828a
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_debugfs.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+#define REGDEF(reg) { reg, #reg }
+static const struct {
+	u32 reg;
+	const char *name;
+} baikal_vdu_reg_defs[] = {
+	REGDEF(CR1),
+	REGDEF(HTR),
+	REGDEF(VTR1),
+	REGDEF(VTR2),
+	REGDEF(PCTR),
+	REGDEF(ISR),
+	REGDEF(IMR),
+	REGDEF(IVR),
+	REGDEF(ISCR),
+	REGDEF(DBAR),
+	REGDEF(DCAR),
+	REGDEF(DEAR),
+	REGDEF(PWMFR),
+	REGDEF(PWMDCR),
+	REGDEF(HVTER),
+	REGDEF(HPPLOR),
+	REGDEF(GPIOR),
+	REGDEF(MRR),
+};
+
+#define REGS_HDMI	"regs_hdmi"
+#define REGS_LVDS	"regs_lvds"
+
+int baikal_vdu_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct baikal_vdu_crossbar *crossbar = dev->dev_private;
+	char *filename = m->file->f_path.dentry->d_iname;
+	struct baikal_vdu_private *priv = NULL;
+	int i;
+
+	if (!strcmp(REGS_HDMI, filename))
+		priv = &crossbar->hdmi;
+	if (!strcmp(REGS_LVDS, filename))
+		priv = &crossbar->lvds;
+
+	if (!priv || !priv->regs)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(baikal_vdu_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   baikal_vdu_reg_defs[i].name, baikal_vdu_reg_defs[i].reg,
+			   readl(priv->regs + baikal_vdu_reg_defs[i].reg));
+	}
+	for (i = 0; i < ARRAY_SIZE(priv->counters); i++) {
+		seq_printf(m, "COUNTER[%d]: 0x%08x\n", i, priv->counters[i]);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list baikal_vdu_hdmi_debugfs_list[] = {
+	{REGS_HDMI, baikal_vdu_debugfs_regs, 0},
+};
+
+static const struct drm_info_list baikal_vdu_lvds_debugfs_list[] = {
+	{REGS_LVDS, baikal_vdu_debugfs_regs, 0},
+};
+
+int baikal_vdu_hdmi_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(baikal_vdu_hdmi_debugfs_list,
+					ARRAY_SIZE(baikal_vdu_hdmi_debugfs_list),
+					minor->debugfs_root, minor);
+}
+
+int baikal_vdu_lvds_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(baikal_vdu_lvds_debugfs_list,
+					ARRAY_SIZE(baikal_vdu_lvds_debugfs_list),
+					minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_drm.h b/drivers/gpu/drm/baikal/baikal_vdu_drm.h
new file mode 100644
index 0000000000000..d8b808a1b4838
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_drm.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+#ifndef __BAIKAL_VDU_DRM_H__
+#define __BAIKAL_VDU_DRM_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <linux/workqueue.h>
+#include <linux/gpio.h>
+#include <linux/backlight.h>
+
+#define CRTC_HDMI	0
+#define CRTC_LVDS	1
+
+#define crtc_to_baikal_vdu(x) \
+	container_of(x, struct baikal_vdu_private, crtc)
+#define bridge_to_baikal_lvds_bridge(x) \
+	container_of(x, struct baikal_lvds_bridge, bridge)
+#define connector_to_baikal_lvds_bridge(x) \
+	container_of(connector, struct baikal_lvds_bridge, connector)
+
+#define VDU_NAME_LEN 5
+
+struct baikal_vdu_private {
+	struct drm_device *drm;
+	struct drm_crtc crtc;
+	struct drm_encoder encoder;
+	struct drm_bridge *bridge;
+	struct drm_plane primary;
+	void *regs;
+	int irq;
+	struct clk *clk;
+	spinlock_t lock;
+	u32 counters[20];
+	int mode_override;
+	int index;
+	char name[VDU_NAME_LEN];
+	char irq_name[VDU_NAME_LEN + 4];
+	char pclk_name[VDU_NAME_LEN + 5];
+	char regs_name[VDU_NAME_LEN + 5];
+	int num_lanes;
+	int data_mapping;
+	int off;
+	int ready;
+
+	/* backlight */
+	struct gpio_desc *enable_gpio;
+	struct backlight_device *bl_dev;
+	int min_brightness;
+	int brightness_step;
+	bool brightness_on;
+};
+
+struct baikal_vdu_crossbar {
+	struct drm_device *drm;
+	struct baikal_vdu_private hdmi;
+	struct baikal_vdu_private lvds;
+};
+
+struct baikal_lvds_bridge {
+	struct baikal_vdu_private *vdu;
+	struct drm_bridge bridge;
+	struct drm_connector connector;
+	struct drm_panel *panel;
+	u32 connector_type;
+};
+
+/* Generic functions */
+inline void baikal_vdu_switch_on(struct baikal_vdu_private *priv);
+
+inline void baikal_vdu_switch_off(struct baikal_vdu_private *priv);
+
+/* Bridge functions */
+bool bridge_is_baikal_lvds_bridge(const struct drm_bridge *bridge);
+
+struct drm_bridge *devm_baikal_lvds_bridge_add(struct device *dev,
+					     struct drm_panel *panel,
+					     u32 connector_type);
+
+/* CRTC Functions */
+int baikal_vdu_crtc_create(struct baikal_vdu_private *priv);
+
+irqreturn_t baikal_vdu_irq(int irq, void *data);
+
+int baikal_vdu_primary_plane_init(struct baikal_vdu_private *priv);
+
+/* Backlight Functions */
+int baikal_vdu_backlight_create(struct drm_device *drm);
+
+/* Debugfs functions */
+int baikal_vdu_hdmi_debugfs_init(struct drm_minor *minor);
+
+int baikal_vdu_lvds_debugfs_init(struct drm_minor *minor);
+
+#endif /* __BAIKAL_VDU_DRM_H__ */
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_drv.c b/drivers/gpu/drm/baikal/baikal_vdu_drv.c
new file mode 100644
index 0000000000000..a2df4e1147dff
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_drv.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ * Bugfixes by Alexey Sheplyakov <asheplyakov@altlinux.org>
+ *
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/irq.h>
+#include <linux/clk.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_probe_helper.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+#define DRIVER_NAME		"baikal-vdu"
+#define DRIVER_DESC		"Baikal VDU DRM driver"
+#define DRIVER_DATE		"20230221"
+
+#define BAIKAL_SMC_LOG_DISABLE	0x82000200
+#define IFIFO_SIZE	16384
+#define UHD_FIFO_SIZE	16384
+
+int mode_override = 0;
+int hdmi_off = 0;
+int lvds_off = 0;
+
+static struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = drm_gem_fb_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+const struct drm_encoder_funcs baikal_vdu_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static struct drm_bridge *devm_baikal_of_get_bridge(struct device *dev,
+					  struct device_node *np,
+					  u32 port, u32 endpoint)
+{
+	struct drm_bridge *bridge;
+	struct drm_panel *panel;
+	int ret;
+
+	ret = drm_of_find_panel_or_bridge(np, port, endpoint,
+					  &panel, &bridge);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (panel) {
+		bridge = devm_baikal_lvds_bridge_add(dev, panel, DRM_MODE_CONNECTOR_LVDS);
+	}
+
+	return bridge;
+}
+
+static int baikal_vdu_remove_efifb(struct drm_device *dev)
+{
+	int err;
+	err = drm_fb_helper_remove_conflicting_framebuffers(NULL,
+							    "baikal-vdudrmfb",
+							    false);
+	if (err)
+		dev_warn(dev->dev, "failed to remove firmware framebuffer\n");
+	return err;
+}
+
+inline void baikal_vdu_switch_off(struct baikal_vdu_private *priv)
+{
+	u32 cntl = readl(priv->regs + CR1);
+	cntl &= ~CR1_LCE;
+	writel(cntl, priv->regs + CR1);
+}
+
+inline void baikal_vdu_switch_on(struct baikal_vdu_private *priv)
+{
+	u32 cntl = readl(priv->regs + CR1);
+	cntl |= CR1_LCE;
+	writel(cntl, priv->regs + CR1);
+}
+
+static int baikal_vdu_modeset_init(struct baikal_vdu_private *priv)
+{
+	struct drm_device *dev = priv->drm;
+	struct drm_encoder *encoder;
+	int ret = 0;
+	int index;
+
+	if (priv == NULL)
+		return -EINVAL;
+	index = priv->index;
+
+	ret = baikal_vdu_primary_plane_init(priv);
+	if (ret != 0) {
+		dev_err(dev->dev, "%s: failed to init primary plane\n", priv->name);
+		return ret;
+	}
+
+	ret = baikal_vdu_crtc_create(priv);
+	if (ret) {
+		dev_err(dev->dev, "%s: failed to create CRTC\n", priv->name);
+		return ret;
+	}
+
+	if (priv->bridge) {
+		encoder = &priv->encoder;
+		ret = drm_encoder_init(dev, encoder, &baikal_vdu_encoder_funcs,
+				       DRM_MODE_ENCODER_NONE, NULL);
+		if (ret) {
+			dev_err(dev->dev, "%s: failed to create DRM encoder\n", priv->name);
+			return ret;
+		}
+		encoder->crtc = &priv->crtc;
+		encoder->possible_crtcs = BIT(drm_crtc_index(encoder->crtc));
+		priv->bridge->encoder = encoder;
+		encoder->bridge = priv->bridge;
+		ret = drm_bridge_attach(encoder, priv->bridge, NULL);
+		if (ret) {
+			dev_err(dev->dev, "%s: failed to attach DRM bridge (%d)\n", priv->name, ret);
+			return ret;
+		}
+	} else {
+		dev_err(dev->dev, "%s: no bridge or panel attached\n", priv->name);
+		return -ENODEV;
+	}
+
+	priv->mode_override = mode_override;
+
+	return ret;
+}
+
+static int baikal_dumb_create(struct drm_file *file, struct drm_device *dev,
+			     struct drm_mode_create_dumb *args)
+{
+	args->pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+	args->size = args->pitch * args->height + IFIFO_SIZE + UHD_FIFO_SIZE;
+
+	return drm_gem_cma_dumb_create_internal(file, dev, args);
+}
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = drm_gem_cma_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+};
+
+static struct drm_driver vdu_drm_driver = {
+	.driver_features = DRIVER_GEM |	DRIVER_MODESET | DRIVER_ATOMIC,
+	.lastclose = drm_fb_helper_lastclose,
+	.ioctls = NULL,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = 2,
+	.minor = 0,
+	.patchlevel = 0,
+	.dumb_create = baikal_dumb_create,
+	.dumb_destroy = drm_gem_dumb_destroy,
+	.dumb_map_offset = drm_gem_dumb_map_offset,
+	.gem_free_object_unlocked = drm_gem_cma_free_object,
+	.gem_print_info = drm_gem_cma_print_info,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
+	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+};
+
+static int baikal_vdu_allocate_resources(struct platform_device *pdev,
+		struct baikal_vdu_private *priv)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	int ret;
+
+	if (!(mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, priv->regs_name))) {
+		dev_err(dev, "%s %s: no MMIO resource specified\n", __func__, priv->name);
+		return -EINVAL;
+	}
+
+	priv->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(priv->regs)) {
+		dev_err(dev, "%s %s: MMIO allocation failed\n", __func__, priv->name);
+		return PTR_ERR(priv->regs);
+	}
+
+	if (priv->off) {
+		baikal_vdu_switch_off(priv);
+		return -EPERM;
+	} else {
+		ret = baikal_vdu_modeset_init(priv);
+		if (ret) {
+			dev_err(dev, "%s %s: failed to init modeset\n", __func__, priv->name);
+			if (ret == -ENODEV) {
+				baikal_vdu_switch_off(priv);
+			}
+			return ret;
+		} else {
+			writel(MRR_MAX_VALUE, priv->regs + MRR);
+			spin_lock_init(&priv->lock);
+			return 0;
+		}
+	}
+}
+
+static int baikal_vdu_allocate_irq(struct platform_device *pdev,
+	       struct baikal_vdu_private *priv)
+{
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	if (!(priv->irq = platform_get_irq_byname(pdev, priv->irq_name))) {
+		dev_err(dev, "%s %s: no IRQ resource specified\n", __func__, priv->name);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts before requesting the irq */
+	writel(0, priv->regs + IMR);
+	ret = request_irq(priv->irq, baikal_vdu_irq, IRQF_SHARED, dev->driver->name, priv);
+	if (ret != 0)
+		dev_err(dev, "%s %s: IRQ %d allocation failed\n", __func__, priv->name, priv->irq);
+	return ret;
+}
+
+static void baikal_vdu_free_irq(struct baikal_vdu_private *priv)
+{
+	writel(0, priv->regs + IMR);
+	writel(0x3ffff, priv->regs + ISR);
+	free_irq(priv->irq, priv->drm->dev);
+}
+
+static int baikal_vdu_allocate_clk(struct baikal_vdu_private *priv)
+{
+	priv->clk = clk_get(priv->drm->dev, priv->pclk_name);
+	if (IS_ERR(priv->clk)) {
+		dev_err(priv->drm->dev, "%s: unable to get %s, err %ld\n", priv->name, priv->pclk_name, PTR_ERR(priv->clk));
+		return PTR_ERR(priv->clk);
+	} else
+		return 0;
+}
+
+static void baikal_vdu_set_name(struct baikal_vdu_private *priv, int index, const char *name)
+{
+	char *c;
+	int len = sizeof(priv->name) / sizeof(priv->name[0]) - 1;
+	strncpy(priv->name, name, len);
+	for (c = priv->name; c < priv->name + len && *c; c++) {
+		*c = toupper(*c);
+	}
+	sprintf(priv->irq_name, "%s_irq", name);
+	sprintf(priv->pclk_name, "%s_pclk", name);
+	sprintf(priv->regs_name, "%s_regs", name);
+	priv->index = index;
+}
+
+static int baikal_vdu_bridge_init(struct baikal_vdu_private *priv, struct drm_device *drm) {
+	int ret = 0;
+	struct device *dev;
+	struct drm_bridge *bridge;
+	if (!priv || !drm)
+		return -ENODEV;
+	priv->drm = drm;
+	dev = drm->dev;
+	bridge = devm_baikal_of_get_bridge(dev, dev->of_node, priv->index, 0);
+	if (IS_ERR(bridge)) {
+		ret = PTR_ERR(bridge);
+		if (ret == -EPROBE_DEFER) {
+			dev_info(dev, "%s: bridge probe deferred\n", priv->name);
+		}
+		priv->bridge = NULL;
+	} else {
+		priv->bridge = bridge;
+	}
+	return ret;
+}
+
+static int baikal_vdu_resources_init(struct platform_device *pdev, struct baikal_vdu_private *priv)
+{
+	int ret = baikal_vdu_allocate_resources(pdev, priv);
+	if (ret)
+		return 0;
+	ret = baikal_vdu_allocate_irq(pdev, priv);
+	if (ret)
+		return 0;
+	ret = baikal_vdu_allocate_clk(priv);
+	if (ret) {
+		baikal_vdu_free_irq(priv);
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
+static int baikal_vdu_drm_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct baikal_lvds_bridge *panel_bridge;
+	struct baikal_vdu_crossbar *crossbar;
+	struct baikal_vdu_private *hdmi;
+	struct baikal_vdu_private *lvds;
+	struct drm_device *drm;
+	struct drm_mode_config *mode_config;
+	struct arm_smccc_res res;
+	int ret;
+
+	crossbar = devm_kzalloc(dev, sizeof(*crossbar), GFP_KERNEL);
+	if (!crossbar)
+		return -ENOMEM;
+
+	drm = drm_dev_alloc(&vdu_drm_driver, dev);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+
+	platform_set_drvdata(pdev, drm);
+	crossbar->drm = drm;
+	drm->dev_private = crossbar;
+	hdmi = &crossbar->hdmi;
+	baikal_vdu_set_name(hdmi, CRTC_HDMI, "hdmi");
+	lvds = &crossbar->lvds;
+	baikal_vdu_set_name(lvds, CRTC_LVDS, "lvds");
+
+	ret = baikal_vdu_bridge_init(hdmi, drm);
+	if (ret == -EPROBE_DEFER) {
+		goto out_drm;
+	}
+
+	ret = of_property_read_u32(pdev->dev.of_node, "lvds-lanes",
+				   &lvds->num_lanes);
+	if (ret)
+		lvds->num_lanes = 0;
+	if (lvds->num_lanes) {
+		ret = baikal_vdu_bridge_init(lvds, drm);
+		if (ret == -EPROBE_DEFER) {
+			goto out_drm;
+		}
+	}
+
+	drm_mode_config_init(drm);
+	mode_config = &drm->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 8192;
+	mode_config->min_height = 1;
+	mode_config->max_height = 8192;
+
+	hdmi->off = hdmi_off;
+	hdmi->ready = baikal_vdu_resources_init(pdev, hdmi);
+	if (lvds->num_lanes) {
+		lvds->off = lvds_off;
+		lvds->ready = baikal_vdu_resources_init(pdev, lvds);
+	} else {
+		lvds->ready = 0;
+		lvds->bridge = NULL;
+		dev_info(dev, "No 'lvds-lanes' property found\n");
+	}
+	if (lvds->ready) {
+		ret = baikal_vdu_backlight_create(drm);
+		if (ret) {
+			dev_err(dev, "LVDS: failed to create backlight\n");
+		}
+		if (bridge_is_baikal_lvds_bridge(lvds->bridge)) {
+			panel_bridge = bridge_to_baikal_lvds_bridge(lvds->bridge);
+			panel_bridge->vdu = lvds;
+		} else {
+		// TODO implement handling of third-party bridges
+		}
+	}
+	if (hdmi->bridge) {
+		// TODO implement functions specific to HDMI bridge
+	}
+
+	hdmi->ready = hdmi->ready & !hdmi->off;
+	lvds->ready = lvds->ready & !lvds->off;
+	dev_info(dev, "%s output %s\n", hdmi->name, hdmi->ready ? "enabled" : "disabled");
+	dev_info(dev, "%s output %s\n", lvds->name, lvds->ready ? "enabled" : "disabled");
+	baikal_vdu_remove_efifb(drm);
+
+	if (hdmi->ready || lvds->ready) {
+		/* Disable SCP debug output as it may affect VDU performance */
+		arm_smccc_smc(BAIKAL_SMC_LOG_DISABLE, 0, 0, 0, 0, 0, 0, 0, &res);
+
+		drm_mode_config_reset(drm);
+		drm_kms_helper_poll_init(drm);
+		ret = drm_dev_register(drm, 0);
+		if (ret) {
+			dev_err(dev, "failed to register DRM device\n");
+			goto out_config;
+		}
+		drm_fbdev_generic_setup(drm, 32);
+#if defined(CONFIG_DEBUG_FS)
+		if (hdmi->ready)
+			baikal_vdu_hdmi_debugfs_init(drm->primary);
+		if (lvds->ready)
+			baikal_vdu_lvds_debugfs_init(drm->primary);
+#endif
+		return 0;
+	} else {
+		dev_err(dev, "no active outputs configured\n");
+		ret = -ENODEV;
+	}
+out_config:
+	drm_mode_config_cleanup(drm);
+out_drm:
+	drm->dev_private = NULL;
+	drm_dev_put(drm);
+	dev_err(dev, "failed to probe: %d\n", ret);
+	return ret;
+}
+
+static int baikal_vdu_drm_remove(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	drm_dev_unregister(drm);
+	drm_mode_config_cleanup(drm);
+	drm_irq_uninstall(drm);
+	drm->dev_private = NULL;
+	drm_dev_put(drm);
+
+	return 0;
+}
+
+static const struct of_device_id baikal_vdu_of_match[] = {
+	{ .compatible = "baikal,vdu" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, baikal_vdu_of_match);
+
+static struct platform_driver baikal_vdu_platform_driver = {
+	.probe  = baikal_vdu_drm_probe,
+	.remove = baikal_vdu_drm_remove,
+	.driver = {
+		.name   = DRIVER_NAME,
+		.of_match_table = baikal_vdu_of_match,
+	},
+};
+
+module_param(mode_override, int, 0644);
+module_param(hdmi_off, int, 0644);
+module_param(lvds_off, int, 0644);
+
+module_platform_driver(baikal_vdu_platform_driver);
+
+MODULE_AUTHOR("Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal Electronics BE-M1000 Video Display Unit (VDU) DRM Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRIVER_NAME);
+MODULE_SOFTDEP("pre: baikal_hdmi");
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_panel.c b/drivers/gpu/drm/baikal/baikal_vdu_panel.c
new file mode 100644
index 0000000000000..bbb198ab4d7a7
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_panel.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_probe_helper.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+static void baikal_lvds_connector_force(struct drm_connector *connector)
+{
+	struct baikal_lvds_bridge *bridge = connector_to_baikal_lvds_bridge(connector);
+	struct baikal_vdu_private *priv = bridge->vdu;
+	u32 cntl = readl(priv->regs + CR1);
+	if (connector->force == DRM_FORCE_OFF)
+		cntl &= ~CR1_LCE;
+	else
+		cntl |= CR1_LCE;
+	writel(cntl, priv->regs + CR1);
+}
+
+static int baikal_lvds_connector_get_modes(struct drm_connector *connector)
+{
+	struct baikal_lvds_bridge *panel_bridge =
+		connector_to_baikal_lvds_bridge(connector);
+
+	return drm_panel_get_modes(panel_bridge->panel);
+}
+
+static const struct drm_connector_helper_funcs
+baikal_lvds_bridge_connector_helper_funcs = {
+	.get_modes = baikal_lvds_connector_get_modes,
+};
+
+static const struct drm_connector_funcs baikal_lvds_bridge_connector_funcs = {
+	.reset = drm_atomic_helper_connector_reset,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.force = baikal_lvds_connector_force,
+};
+
+static int baikal_lvds_bridge_attach(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+	struct drm_connector *connector = &panel_bridge->connector;
+	int ret;
+
+	if (!bridge->encoder) {
+		DRM_ERROR("Missing encoder\n");
+		return -ENODEV;
+	}
+
+	drm_connector_helper_add(connector,
+				 &baikal_lvds_bridge_connector_helper_funcs);
+
+	ret = drm_connector_init(bridge->dev, connector,
+				 &baikal_lvds_bridge_connector_funcs,
+				 panel_bridge->connector_type);
+	if (ret) {
+		DRM_ERROR("Failed to initialize connector\n");
+		return ret;
+	}
+
+	drm_connector_attach_encoder(&panel_bridge->connector,
+					  bridge->encoder);
+
+	ret = drm_panel_attach(panel_bridge->panel, &panel_bridge->connector);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void baikal_lvds_bridge_detach(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	drm_panel_detach(panel_bridge->panel);
+}
+
+static void baikal_lvds_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	baikal_vdu_switch_on(panel_bridge->vdu);
+	drm_panel_prepare(panel_bridge->panel);
+}
+
+static void baikal_lvds_bridge_enable(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	drm_panel_enable(panel_bridge->panel);
+}
+
+static void baikal_lvds_bridge_disable(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	drm_panel_disable(panel_bridge->panel);
+}
+
+static void baikal_lvds_bridge_post_disable(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	drm_panel_unprepare(panel_bridge->panel);
+	baikal_vdu_switch_off(panel_bridge->vdu);
+}
+
+static const struct drm_bridge_funcs baikal_lvds_bridge_funcs = {
+	.attach = baikal_lvds_bridge_attach,
+	.detach = baikal_lvds_bridge_detach,
+	.pre_enable = baikal_lvds_bridge_pre_enable,
+	.enable = baikal_lvds_bridge_enable,
+	.disable = baikal_lvds_bridge_disable,
+	.post_disable = baikal_lvds_bridge_post_disable,
+};
+
+static struct drm_bridge *baikal_lvds_bridge_add(struct drm_panel *panel,
+					u32 connector_type)
+{
+	struct baikal_lvds_bridge *panel_bridge;
+
+	if (!panel)
+		return ERR_PTR(-EINVAL);
+
+	panel_bridge = devm_kzalloc(panel->dev, sizeof(*panel_bridge),
+				    GFP_KERNEL);
+	if (!panel_bridge)
+		return ERR_PTR(-ENOMEM);
+
+	panel_bridge->connector_type = connector_type;
+	panel_bridge->panel = panel;
+
+	panel_bridge->bridge.funcs = &baikal_lvds_bridge_funcs;
+#ifdef CONFIG_OF
+	panel_bridge->bridge.of_node = panel->dev->of_node;
+#endif
+
+	drm_bridge_add(&panel_bridge->bridge);
+
+	return &panel_bridge->bridge;
+}
+
+static void baikal_lvds_bridge_remove(struct drm_bridge *bridge)
+{
+	struct baikal_lvds_bridge *panel_bridge;
+
+	if (!bridge)
+		return;
+
+	if (bridge->funcs != &baikal_lvds_bridge_funcs)
+		return;
+
+	panel_bridge = bridge_to_baikal_lvds_bridge(bridge);
+
+	drm_bridge_remove(bridge);
+	devm_kfree(panel_bridge->panel->dev, bridge);
+}
+
+static void devm_baikal_lvds_bridge_release(struct device *dev, void *res)
+{
+	struct drm_bridge **bridge = res;
+
+	baikal_lvds_bridge_remove(*bridge);
+}
+
+struct drm_bridge *devm_baikal_lvds_bridge_add(struct device *dev,
+					     struct drm_panel *panel,
+					     u32 connector_type)
+{
+	struct drm_bridge **ptr, *bridge;
+
+	ptr = devres_alloc(devm_baikal_lvds_bridge_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	bridge = baikal_lvds_bridge_add(panel, connector_type);
+	if (!IS_ERR(bridge)) {
+		*ptr = bridge;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return bridge;
+}
+
+bool bridge_is_baikal_lvds_bridge(const struct drm_bridge *bridge)
+{
+	return bridge && (bridge->funcs == &baikal_lvds_bridge_funcs);
+}
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_plane.c b/drivers/gpu/drm/baikal/baikal_vdu_plane.c
new file mode 100644
index 0000000000000..40e63c4adbacb
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_plane.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/of_graph.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include "baikal_vdu_drm.h"
+#include "baikal_vdu_regs.h"
+
+static void baikal_vdu_primary_plane_atomic_update(struct drm_plane *plane,
+					      struct drm_plane_state *old_state)
+{
+	struct baikal_vdu_private *priv;
+	struct drm_plane_state *state = plane->state;
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	uint32_t cntl;
+	uint32_t addr;
+	unsigned long flags;
+
+	if (!fb)
+		return;
+
+	priv = crtc_to_baikal_vdu(crtc);
+	addr = drm_fb_cma_get_gem_addr(fb, state, 0);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	writel(addr, priv->regs + DBAR);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	priv->counters[16]++;
+
+	cntl = readl(priv->regs + CR1);
+	cntl &= ~CR1_BPP_MASK;
+
+	/* Note that the the hardware's format reader takes 'r' from
+	 * the low bit, while DRM formats list channels from high bit
+	 * to low bit as you read left to right.
+	 */
+	switch (fb->format->format) {
+	case DRM_FORMAT_BGR888:
+		cntl |= CR1_BPP24 | CR1_FBP | CR1_BGR;
+		break;
+	case DRM_FORMAT_RGB888:
+		cntl |= CR1_BPP24 | CR1_FBP;
+		break;
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_XBGR8888:
+		cntl |= CR1_BPP24 | CR1_BGR;
+		break;
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XRGB8888:
+		cntl |= CR1_BPP24;
+		break;
+	case DRM_FORMAT_BGR565:
+		cntl |= CR1_BPP16_565 | CR1_BGR;
+		break;
+	case DRM_FORMAT_RGB565:
+		cntl |= CR1_BPP16_565;
+		break;
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_XBGR1555:
+		cntl |= CR1_BPP16_555 | CR1_BGR;
+		break;
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_XRGB1555:
+		cntl |= CR1_BPP16_555;
+		break;
+	default:
+		WARN_ONCE(true, "Unknown FB format 0x%08x, set XRGB8888 instead\n",
+				fb->format->format);
+		cntl |= CR1_BPP24;
+		break;
+	}
+
+	writel(cntl, priv->regs + CR1);
+}
+
+static const struct drm_plane_helper_funcs baikal_vdu_primary_plane_helper_funcs = {
+	.atomic_update = baikal_vdu_primary_plane_atomic_update,
+	.prepare_fb = drm_gem_fb_prepare_fb,
+};
+
+static const struct drm_plane_funcs baikal_vdu_primary_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.reset = drm_atomic_helper_plane_reset,
+	.destroy = drm_plane_cleanup,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+int baikal_vdu_primary_plane_init(struct baikal_vdu_private *priv)
+{
+	struct drm_device *drm = priv->drm;
+	struct drm_plane *plane = &priv->primary;
+	static const u32 formats[] = {
+		DRM_FORMAT_BGR888,
+		DRM_FORMAT_RGB888,
+		DRM_FORMAT_ABGR8888,
+		DRM_FORMAT_XBGR8888,
+		DRM_FORMAT_ARGB8888,
+		DRM_FORMAT_XRGB8888,
+		DRM_FORMAT_BGR565,
+		DRM_FORMAT_RGB565,
+		DRM_FORMAT_ABGR1555,
+		DRM_FORMAT_XBGR1555,
+		DRM_FORMAT_ARGB1555,
+		DRM_FORMAT_XRGB1555,
+	};
+	int ret;
+
+	ret = drm_universal_plane_init(drm, plane, 0,
+				       &baikal_vdu_primary_plane_funcs,
+				       formats,
+				       ARRAY_SIZE(formats),
+				       NULL,
+				       DRM_PLANE_TYPE_PRIMARY,
+				       NULL);
+	if (ret)
+		return ret;
+
+	drm_plane_helper_add(plane, &baikal_vdu_primary_plane_helper_funcs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/baikal/baikal_vdu_regs.h b/drivers/gpu/drm/baikal/baikal_vdu_regs.h
new file mode 100644
index 0000000000000..d48ea8d12051f
--- /dev/null
+++ b/drivers/gpu/drm/baikal/baikal_vdu_regs.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ */
+
+#ifndef __BAIKAL_VDU_REGS_H__
+#define __BAIKAL_VDU_REGS_H__
+
+#define CR1         0x000
+#define HTR         0x008
+#define VTR1        0x00C
+#define VTR2        0x010
+#define PCTR        0x014
+#define ISR         0x018
+#define IMR         0x01C
+#define IVR         0x020
+#define ISCR        0x024
+#define DBAR        0x028
+#define DCAR        0x02C
+#define DEAR        0x030
+#define PWMFR       0x034
+#define PWMDCR      0x038
+#define HVTER       0x044
+#define HPPLOR      0x048
+#define GPIOR       0x1F8
+#define MRR         0xFFC
+
+#define INTR_UFU    BIT(16)
+#define INTR_BAU    BIT(7)
+#define INTR_VCT    BIT(6)
+#define INTR_MBE    BIT(5)
+#define INTR_FER    BIT(4)
+#define INTR_IFO    BIT(3)
+#define INTR_OFU    BIT(0)
+
+#define CR1_FBP             BIT(19)
+#define CR1_FDW_MASK        GENMASK(17, 16)
+#define CR1_FDW_4_WORDS     (0 << 16)
+#define CR1_FDW_8_WORDS     (1 << 16)
+#define CR1_FDW_16_WORDS    (2 << 16)
+#define CR1_OPS_MASK        GENMASK(14, 12)
+#define CR1_OPS_LCD18       (0 << 13)
+#define CR1_OPS_LCD24       (1 << 13)
+#define CR1_OPS_565         (0 << 12)
+#define CR1_OPS_555         (1 << 12)
+#define CR1_VSP             BIT(11)
+#define CR1_HSP             BIT(10)
+#define CR1_DEP             BIT(8)
+#define CR1_BGR             BIT(5)
+#define CR1_BPP_MASK        GENMASK(4, 2)
+#define CR1_BPP1            (0 << 2)
+#define CR1_BPP2            (1 << 2)
+#define CR1_BPP4            (2 << 2)
+#define CR1_BPP8            (3 << 2)
+#define CR1_BPP16           (4 << 2)
+#define CR1_BPP18           (5 << 2)
+#define CR1_BPP24           (6 << 2)
+#define CR1_LCE             BIT(0)
+
+#define CR1_BPP16_555 ((CR1_BPP16) | (CR1_OPS_555))
+#define CR1_BPP16_565 ((CR1_BPP16) | (CR1_OPS_565))
+
+#define VTR1_VBP_MASK       GENMASK(23, 16)
+#define VTR1_VBP(x)         ((x) << 16)
+#define VTR1_VBP_LSB_WIDTH  8
+#define VTR1_VFP_MASK       GENMASK(15, 8)
+#define VTR1_VFP(x)         ((x) << 8)
+#define VTR1_VFP_LSB_WIDTH  8
+#define VTR1_VSW_MASK       GENMASK(7, 0)
+#define VTR1_VSW(x)         ((x) << 0)
+#define VTR1_VSW_LSB_WIDTH  8
+
+#define VTR2_LPP_MASK       GENMASK(11, 0)
+
+#define HTR_HSW_MASK        GENMASK(31, 24)
+#define HTR_HSW(x)          ((x) << 24)
+#define HTR_HSW_LSB_WIDTH   8
+#define HTR_HBP_MASK        GENMASK(23, 16)
+#define HTR_HBP(x)          ((x) << 16)
+#define HTR_HBP_LSB_WIDTH   8
+#define HTR_PPL_MASK        GENMASK(15, 8)
+#define HTR_PPL(x)          ((x) << 8)
+#define HTR_HFP_MASK        GENMASK(7, 0)
+#define HTR_HFP(x)          ((x) << 0)
+#define HTR_HFP_LSB_WIDTH   8
+
+#define PCTR_PCI2           BIT(11)
+#define PCTR_PCR            BIT(10)
+#define PCTR_PCI            BIT(9)
+#define PCTR_PCB            BIT(8)
+#define PCTR_PCD_MASK       GENMASK(7, 0)
+#define PCTR_MAX_PCD        128
+
+#define ISCR_VSC_OFF        0x0
+#define ISCR_VSC_VSW        0x4
+#define ISCR_VSC_VBP        0x5
+#define ISCR_VSC_VACTIVE    0x6
+#define ISCR_VSC_VFP        0x7
+
+#define PWMFR_PWMPCR        BIT(24)
+#define PWMFR_PWMFCI        BIT(23)
+#define PWMFR_PWMFCE        BIT(22)
+#define PWMFR_PWMFCD_MASK   GENMASK(21, 0)
+#define PWMFR_PWMFCD(x)     ((x) << 0)
+
+#define HVTER_VSWE_MASK     GENMASK(25, 24)
+#define HVTER_VSWE(x)       ((x) << 24)
+#define HVTER_HSWE_MASK     GENMASK(17, 16)
+#define HVTER_HSWE(x)       ((x) << 16)
+#define HVTER_VBPE_MASK     GENMASK(13, 12)
+#define HVTER_VBPE(x)       ((x) << 12)
+#define HVTER_VFPE_MASK     GENMASK(9, 8)
+#define HVTER_VFPE(x)       ((x) << 8)
+#define HVTER_HBPE_MASK     GENMASK(5, 4)
+#define HVTER_HBPE(x)       ((x) << 4)
+#define HVTER_HFPE_MASK     GENMASK(1, 0)
+#define HVTER_HFPE(x)       ((x) << 0)
+
+#define HPPLOR_HPOE         BIT(31)
+#define HPPLOR_HPPLO_MASK   GENMASK(11, 0)
+#define HPPLOR_HPPLO(x)     ((x) << 0)
+
+#define GPIOR_UHD_MASK      GENMASK(23, 16)
+#define GPIOR_UHD_SNGL_PORT (0 << 18)
+#define GPIOR_UHD_DUAL_PORT (1 << 18)
+#define GPIOR_UHD_QUAD_PORT (2 << 18)
+#define GPIOR_UHD_ENB       BIT(17)
+
+#define MRR_DEAR_MRR_MASK   GENMASK(31, 3)
+#define MRR_OUTSTND_RQ_MASK GENMASK(2, 0)
+#define MRR_OUTSTND_RQ(x)   ((x >> 1) << 0)
+#define MRR_MAX_VALUE       ((0xffffffff & MRR_DEAR_MRR_MASK) | MRR_OUTSTND_RQ(4))
+
+#endif /* __BAIKAL_VDU_REGS_H__ */
diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig
index 21a1be3ced0f3..06814ba3f8b08 100644
--- a/drivers/gpu/drm/bridge/synopsys/Kconfig
+++ b/drivers/gpu/drm/bridge/synopsys/Kconfig
@@ -39,3 +39,20 @@ config DRM_DW_MIPI_DSI
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL_BRIDGE
+
+config DRM_BAIKAL_HDMI
+	tristate "Baikal-M HDMI transmitter"
+	select DRM_DW_HDMI
+	help
+	  Choose this if you want to use HDMI on Baikal-M.
+
+config DRM_BAIKAL_HDMI_AHB_AUDIO
+	tristate "Baikal-M HDMI Audio interface"
+	depends on DRM_BAIKAL_HDMI && SND
+	select SND_PCM
+	select SND_PCM_ELD
+	select SND_PCM_IEC958
+	help
+	  Support the AHB Audio interface which is part of the
+	  Baikal-M HDMI Tx block.
+
diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile
index 91d746ad5de12..64e6637d16b38 100644
--- a/drivers/gpu/drm/bridge/synopsys/Makefile
+++ b/drivers/gpu/drm/bridge/synopsys/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o
 obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o
+obj-$(CONFIG_DRM_BAIKAL_HDMI_AHB_AUDIO) += baikal-hdmi-ahb-audio.o
 obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o
 obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o
 
diff --git a/drivers/gpu/drm/bridge/synopsys/baikal-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/baikal-hdmi-ahb-audio.c
new file mode 100644
index 0000000000000..9717cd768b7b3
--- /dev/null
+++ b/drivers/gpu/drm/bridge/synopsys/baikal-hdmi-ahb-audio.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal Electronics BE-M1000 DesignWare HDMI AHB audio driver
+ *
+ * Copyright (C) 2020 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * DesignWare HDMI audio driver
+ *
+ * Written and tested against the Designware HDMI Tx found in iMX6.
+ */
+
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <drm/bridge/dw_hdmi.h>
+#include <drm/drm_edid.h>
+
+#include <sound/asoundef.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/pcm.h>
+#include <sound/pcm_drm_eld.h>
+#include <sound/pcm_iec958.h>
+
+#include "dw-hdmi-audio.h"
+
+#define DRIVER_NAME "dw-hdmi-ahb-audio"
+
+#define BAIKAL_HDMI_REG_SHIFT  2
+
+/* Provide some bits rather than bit offsets */
+enum {
+	HDMI_AHB_DMA_CONF0_SW_FIFO_RST = BIT(7),
+	HDMI_AHB_DMA_CONF0_EN_HLOCK = BIT(3),
+	HDMI_AHB_DMA_START_START = BIT(0),
+	HDMI_AHB_DMA_STOP_STOP = BIT(0),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_OVERRUN = BIT(6),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_ERROR = BIT(5),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_LOST = BIT(4),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_RETRY = BIT(3),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_DONE = BIT(2),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_BUFFFULL = BIT(1),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_BUFFEMPTY = BIT(0),
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0_ALL =
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_OVERRUN |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_ERROR |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_LOST |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_RETRY |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_DONE |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_BUFFFULL |
+		HDMI_IH_MUTE_AHBDMAAUD_STAT0_BUFFEMPTY,
+	HDMI_IH_AHBDMAAUD_STAT0_OVERRUN = BIT(6),
+	HDMI_IH_AHBDMAAUD_STAT0_ERROR = BIT(5),
+	HDMI_IH_AHBDMAAUD_STAT0_LOST = BIT(4),
+	HDMI_IH_AHBDMAAUD_STAT0_RETRY = BIT(3),
+	HDMI_IH_AHBDMAAUD_STAT0_DONE = BIT(2),
+	HDMI_IH_AHBDMAAUD_STAT0_BUFFFULL = BIT(1),
+	HDMI_IH_AHBDMAAUD_STAT0_BUFFEMPTY = BIT(0),
+	HDMI_IH_AHBDMAAUD_STAT0_ALL =
+		HDMI_IH_AHBDMAAUD_STAT0_OVERRUN |
+		HDMI_IH_AHBDMAAUD_STAT0_ERROR |
+		HDMI_IH_AHBDMAAUD_STAT0_LOST |
+		HDMI_IH_AHBDMAAUD_STAT0_RETRY |
+		HDMI_IH_AHBDMAAUD_STAT0_DONE |
+		HDMI_IH_AHBDMAAUD_STAT0_BUFFFULL |
+		HDMI_IH_AHBDMAAUD_STAT0_BUFFEMPTY,
+	HDMI_AHB_DMA_CONF0_INCR16 = 2 << 1,
+	HDMI_AHB_DMA_CONF0_INCR8 = 1 << 1,
+	HDMI_AHB_DMA_CONF0_INCR4 = 0,
+	HDMI_AHB_DMA_CONF0_BURST_MODE = BIT(0),
+	HDMI_AHB_DMA_MASK_DONE = BIT(7),
+
+	HDMI_REVISION_ID = 0x0001,
+	HDMI_IH_AHBDMAAUD_STAT0 = 0x0109,
+	HDMI_IH_MUTE_AHBDMAAUD_STAT0 = 0x0189,
+	HDMI_FC_AUDICONF2 = 0x1027,
+	HDMI_FC_AUDSCONF = 0x1063,
+	HDMI_FC_AUDSCONF_LAYOUT1 = 1 << 0,
+	HDMI_FC_AUDSCONF_LAYOUT0 = 0 << 0,
+	HDMI_AHB_DMA_CONF0 = 0x3600,
+	HDMI_AHB_DMA_START = 0x3601,
+	HDMI_AHB_DMA_STOP = 0x3602,
+	HDMI_AHB_DMA_THRSLD = 0x3603,
+	HDMI_AHB_DMA_STRADDR0 = 0x3604,
+	HDMI_AHB_DMA_STPADDR0 = 0x3608,
+	HDMI_AHB_DMA_MASK = 0x3614,
+	HDMI_AHB_DMA_POL = 0x3615,
+	HDMI_AHB_DMA_CONF1 = 0x3616,
+	HDMI_AHB_DMA_BUFFPOL = 0x361a,
+};
+
+struct dw_hdmi_channel_conf {
+	u8 conf1;
+	u8 ca;
+};
+
+/*
+ * The default mapping of ALSA channels to HDMI channels and speaker
+ * allocation bits.  Note that we can't do channel remapping here -
+ * channels must be in the same order.
+ *
+ * Mappings for alsa-lib pcm/surround*.conf files:
+ *
+ *		Front	Sur4.0	Sur4.1	Sur5.0	Sur5.1	Sur7.1
+ * Channels	2	4	6	6	6	8
+ *
+ * Our mapping from ALSA channel to CEA686D speaker name and HDMI channel:
+ *
+ *				Number of ALSA channels
+ * ALSA Channel	2	3	4	5	6	7	8
+ * 0		FL:0	=	=	=	=	=	=
+ * 1		FR:1	=	=	=	=	=	=
+ * 2			FC:3	RL:4	LFE:2	=	=	=
+ * 3				RR:5	RL:4	FC:3	=	=
+ * 4					RR:5	RL:4	=	=
+ * 5						RR:5	=	=
+ * 6							RC:6	=
+ * 7							RLC/FRC	RLC/FRC
+ */
+static struct dw_hdmi_channel_conf default_hdmi_channel_config[7] = {
+	{ 0x03, 0x00 },	/* FL,FR */
+	{ 0x0b, 0x02 },	/* FL,FR,FC */
+	{ 0x33, 0x08 },	/* FL,FR,RL,RR */
+	{ 0x37, 0x09 },	/* FL,FR,LFE,RL,RR */
+	{ 0x3f, 0x0b },	/* FL,FR,LFE,FC,RL,RR */
+	{ 0x7f, 0x0f },	/* FL,FR,LFE,FC,RL,RR,RC */
+	{ 0xff, 0x13 },	/* FL,FR,LFE,FC,RL,RR,[FR]RC,[FR]LC */
+};
+
+struct snd_dw_hdmi {
+	struct snd_card *card;
+	struct snd_pcm *pcm;
+	spinlock_t lock;
+	struct dw_hdmi_audio_data data;
+	struct snd_pcm_substream *substream;
+	void (*reformat)(struct snd_dw_hdmi *, size_t, size_t);
+	void *buf_src;
+	void *buf_dst;
+	dma_addr_t buf_addr;
+	unsigned buf_offset;
+	unsigned buf_period;
+	unsigned buf_size;
+	unsigned channels;
+	u8 revision;
+	u8 iec_offset;
+	u8 cs[192][8];
+};
+
+static inline void baikal_hdmi_writeb(u8 val, void __iomem *base, uint offset)
+{
+	writeb(val, base + (offset << BAIKAL_HDMI_REG_SHIFT));
+}
+
+static inline void baikal_hdmi_writeb_relaxed(u8 val, void __iomem *base, uint offset)
+{
+	writeb_relaxed(val, base + (offset << BAIKAL_HDMI_REG_SHIFT));
+}
+
+static inline u8 baikal_hdmi_readb_relaxed(void __iomem *base, uint offset)
+{
+	return readb_relaxed(base + (offset << BAIKAL_HDMI_REG_SHIFT));
+}
+
+static void baikal_hdmi_writel(u32 val, void __iomem *base, uint offset)
+{
+	writeb_relaxed(val, base + (offset << BAIKAL_HDMI_REG_SHIFT));
+	writeb_relaxed(val >> 8, base + ((offset + 1) << BAIKAL_HDMI_REG_SHIFT));
+	writeb_relaxed(val >> 16, base + ((offset + 2) << BAIKAL_HDMI_REG_SHIFT));
+	writeb_relaxed(val >> 24, base + ((offset + 3) << BAIKAL_HDMI_REG_SHIFT));
+}
+
+/*
+ * Convert to hardware format: The userspace buffer contains IEC958 samples,
+ * with the PCUV bits in bits 31..28 and audio samples in bits 27..4.  We
+ * need these to be in bits 27..24, with the IEC B bit in bit 28, and audio
+ * samples in 23..0.
+ *
+ * Default preamble in bits 3..0: 8 = block start, 4 = even 2 = odd
+ *
+ * Ideally, we could do with having the data properly formatted in userspace.
+ */
+static void dw_hdmi_reformat_iec958(struct snd_dw_hdmi *dw,
+	size_t offset, size_t bytes)
+{
+	u32 *src = dw->buf_src + offset;
+	u32 *dst = dw->buf_dst + offset;
+	u32 *end = dw->buf_src + offset + bytes;
+
+	do {
+		u32 b, sample = *src++;
+
+		b = (sample & 8) << (28 - 3);
+
+		sample >>= 4;
+
+		*dst++ = sample | b;
+	} while (src < end);
+}
+
+static u32 parity(u32 sample)
+{
+	sample ^= sample >> 16;
+	sample ^= sample >> 8;
+	sample ^= sample >> 4;
+	sample ^= sample >> 2;
+	sample ^= sample >> 1;
+	return (sample & 1) << 27;
+}
+
+static void dw_hdmi_reformat_s24(struct snd_dw_hdmi *dw,
+	size_t offset, size_t bytes)
+{
+	u32 *src = dw->buf_src + offset;
+	u32 *dst = dw->buf_dst + offset;
+	u32 *end = dw->buf_src + offset + bytes;
+
+	do {
+		unsigned i;
+		u8 *cs;
+
+		cs = dw->cs[dw->iec_offset++];
+		if (dw->iec_offset >= 192)
+			dw->iec_offset = 0;
+
+		i = dw->channels;
+		do {
+			u32 sample = *src++;
+
+			sample &= ~0xff000000;
+			sample |= *cs++ << 24;
+			sample |= parity(sample & ~0xf8000000);
+
+			*dst++ = sample;
+		} while (--i);
+	} while (src < end);
+}
+
+static void dw_hdmi_create_cs(struct snd_dw_hdmi *dw,
+	struct snd_pcm_runtime *runtime)
+{
+	u8 cs[4];
+	unsigned ch, i, j;
+
+	snd_pcm_create_iec958_consumer(runtime, cs, sizeof(cs));
+
+	memset(dw->cs, 0, sizeof(dw->cs));
+
+	for (ch = 0; ch < 8; ch++) {
+		cs[2] &= ~IEC958_AES2_CON_CHANNEL;
+		cs[2] |= (ch + 1) << 4;
+
+		for (i = 0; i < ARRAY_SIZE(cs); i++) {
+			unsigned c = cs[i];
+
+			for (j = 0; j < 8; j++, c >>= 1)
+				dw->cs[i * 8 + j][ch] = (c & 1) << 2;
+		}
+	}
+	dw->cs[0][0] |= BIT(4);
+}
+
+static void dw_hdmi_start_dma(struct snd_dw_hdmi *dw)
+{
+	void __iomem *base = dw->data.base;
+	unsigned offset = dw->buf_offset;
+	unsigned period = dw->buf_period;
+	u32 start, stop;
+
+	dw->reformat(dw, offset, period);
+
+	/* Clear all irqs before enabling irqs and starting DMA */
+	baikal_hdmi_writeb_relaxed(HDMI_IH_AHBDMAAUD_STAT0_ALL,
+		       base, HDMI_IH_AHBDMAAUD_STAT0);
+
+	start = dw->buf_addr + offset;
+	stop = start + period - 1;
+
+	/* Setup the hardware start/stop addresses */
+	baikal_hdmi_writel(start, base, HDMI_AHB_DMA_STRADDR0);
+	baikal_hdmi_writel(stop, base, HDMI_AHB_DMA_STPADDR0);
+
+	baikal_hdmi_writeb_relaxed((u8)~HDMI_AHB_DMA_MASK_DONE, base, HDMI_AHB_DMA_MASK);
+	baikal_hdmi_writeb(HDMI_AHB_DMA_START_START, base, HDMI_AHB_DMA_START);
+
+	offset += period;
+	if (offset >= dw->buf_size)
+		offset = 0;
+	dw->buf_offset = offset;
+}
+
+static void dw_hdmi_stop_dma(struct snd_dw_hdmi *dw)
+{
+	/* Disable interrupts before disabling DMA */
+	baikal_hdmi_writeb_relaxed(~0, dw->data.base, HDMI_AHB_DMA_MASK);
+	baikal_hdmi_writeb_relaxed(HDMI_AHB_DMA_STOP_STOP, dw->data.base, HDMI_AHB_DMA_STOP);
+}
+
+static irqreturn_t snd_dw_hdmi_irq(int irq, void *data)
+{
+	struct snd_dw_hdmi *dw = data;
+	struct snd_pcm_substream *substream;
+	unsigned stat;
+
+	stat = baikal_hdmi_readb_relaxed(dw->data.base, HDMI_IH_AHBDMAAUD_STAT0);
+	if (!stat)
+		return IRQ_NONE;
+
+	baikal_hdmi_writeb_relaxed(stat, dw->data.base, HDMI_IH_AHBDMAAUD_STAT0);
+
+	substream = dw->substream;
+	if (stat & HDMI_IH_AHBDMAAUD_STAT0_DONE && substream) {
+		snd_pcm_period_elapsed(substream);
+
+		spin_lock(&dw->lock);
+		if (dw->substream)
+			dw_hdmi_start_dma(dw);
+		spin_unlock(&dw->lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct snd_pcm_hardware dw_hdmi_hw = {
+	.info = SNDRV_PCM_INFO_INTERLEAVED |
+		SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		SNDRV_PCM_INFO_MMAP |
+		SNDRV_PCM_INFO_MMAP_VALID,
+	.formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE |
+		   SNDRV_PCM_FMTBIT_S24_LE,
+	.rates = SNDRV_PCM_RATE_32000 |
+		 SNDRV_PCM_RATE_44100 |
+		 SNDRV_PCM_RATE_48000 |
+		 SNDRV_PCM_RATE_88200 |
+		 SNDRV_PCM_RATE_96000 |
+		 SNDRV_PCM_RATE_176400 |
+		 SNDRV_PCM_RATE_192000,
+	.channels_min = 2,
+	.channels_max = 8,
+	.buffer_bytes_max = 1024 * 1024,
+	.period_bytes_min = 256,
+	.period_bytes_max = 8192,	/* ERR004323: must limit to 8k */
+	.periods_min = 2,
+	.periods_max = 16,
+	.fifo_size = 0,
+};
+
+static int dw_hdmi_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_dw_hdmi *dw = substream->private_data;
+	void __iomem *base = dw->data.base;
+	int ret;
+
+	runtime->hw = dw_hdmi_hw;
+
+	ret = snd_pcm_hw_constraint_eld(runtime, dw->data.eld);
+	if (ret < 0)
+		return ret;
+
+	ret = snd_pcm_limit_hw_rates(runtime);
+	if (ret < 0)
+		return ret;
+
+	ret = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		return ret;
+
+	/* Limit the buffer size to the size of the preallocated buffer */
+	ret = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
+					   0, substream->dma_buffer.bytes);
+	if (ret < 0)
+		return ret;
+
+	/* Clear FIFO */
+	baikal_hdmi_writeb_relaxed(HDMI_AHB_DMA_CONF0_SW_FIFO_RST,
+		       base, HDMI_AHB_DMA_CONF0);
+
+	/* Configure interrupt polarities */
+	baikal_hdmi_writeb_relaxed(~0, base, HDMI_AHB_DMA_POL);
+	baikal_hdmi_writeb_relaxed(~0, base, HDMI_AHB_DMA_BUFFPOL);
+
+	/* Keep interrupts masked, and clear any pending */
+	baikal_hdmi_writeb_relaxed(~0, base, HDMI_AHB_DMA_MASK);
+	baikal_hdmi_writeb_relaxed(~0, base, HDMI_IH_AHBDMAAUD_STAT0);
+
+	ret = request_irq(dw->data.irq, snd_dw_hdmi_irq, IRQF_SHARED,
+			  "dw-hdmi-audio", dw);
+	if (ret)
+		return ret;
+
+	/* Un-mute done interrupt */
+	baikal_hdmi_writeb_relaxed(HDMI_IH_MUTE_AHBDMAAUD_STAT0_ALL &
+		       ~HDMI_IH_MUTE_AHBDMAAUD_STAT0_DONE,
+		       base, HDMI_IH_MUTE_AHBDMAAUD_STAT0);
+
+	return 0;
+}
+
+static int dw_hdmi_close(struct snd_pcm_substream *substream)
+{
+	struct snd_dw_hdmi *dw = substream->private_data;
+
+	/* Mute all interrupts */
+	baikal_hdmi_writeb_relaxed(HDMI_IH_MUTE_AHBDMAAUD_STAT0_ALL,
+		       dw->data.base, HDMI_IH_MUTE_AHBDMAAUD_STAT0);
+
+	free_irq(dw->data.irq, dw);
+
+	return 0;
+}
+
+static int dw_hdmi_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_vmalloc_buffer(substream);
+}
+
+static int dw_hdmi_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	/* Allocate the PCM runtime buffer, which is exposed to userspace. */
+	return snd_pcm_lib_alloc_vmalloc_buffer(substream,
+						params_buffer_bytes(params));
+}
+
+static int dw_hdmi_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_dw_hdmi *dw = substream->private_data;
+	u8 threshold, conf0, conf1, layout, ca;
+
+	/* Setup as per 3.0.5 FSL 4.1.0 BSP */
+	switch (dw->revision) {
+	case 0x0a:
+		conf0 = HDMI_AHB_DMA_CONF0_BURST_MODE |
+			HDMI_AHB_DMA_CONF0_INCR4;
+		if (runtime->channels == 2)
+			threshold = 126;
+		else
+			threshold = 124;
+		break;
+	case 0x1a:
+		conf0 = HDMI_AHB_DMA_CONF0_BURST_MODE |
+			HDMI_AHB_DMA_CONF0_INCR8;
+		threshold = 128;
+		break;
+	case 0x2a: /* this revision is used in Baikal-M SoC */
+		conf0 = HDMI_AHB_DMA_CONF0_BURST_MODE |
+			HDMI_AHB_DMA_CONF0_INCR16;
+		threshold = 128;
+		break;
+	default:
+		/* NOTREACHED */
+		return -EINVAL;
+	}
+
+	dw_hdmi_set_sample_rate(dw->data.hdmi, runtime->rate);
+
+	/* Minimum number of bytes in the fifo. */
+	runtime->hw.fifo_size = threshold * 32;
+
+	conf0 |= HDMI_AHB_DMA_CONF0_EN_HLOCK;
+	conf1 = default_hdmi_channel_config[runtime->channels - 2].conf1;
+	ca = default_hdmi_channel_config[runtime->channels - 2].ca;
+
+	/*
+	 * For >2 channel PCM audio, we need to select layout 1
+	 * and set an appropriate channel map.
+	 */
+	if (runtime->channels > 2)
+		layout = HDMI_FC_AUDSCONF_LAYOUT1;
+	else
+		layout = HDMI_FC_AUDSCONF_LAYOUT0;
+
+	baikal_hdmi_writeb_relaxed(threshold, dw->data.base, HDMI_AHB_DMA_THRSLD);
+	baikal_hdmi_writeb_relaxed(conf0, dw->data.base, HDMI_AHB_DMA_CONF0);
+	baikal_hdmi_writeb_relaxed(conf1, dw->data.base, HDMI_AHB_DMA_CONF1);
+	baikal_hdmi_writeb_relaxed(layout, dw->data.base, HDMI_FC_AUDSCONF);
+	baikal_hdmi_writeb_relaxed(ca, dw->data.base, HDMI_FC_AUDICONF2);
+
+	switch (runtime->format) {
+	case SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE:
+		dw->reformat = dw_hdmi_reformat_iec958;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		dw_hdmi_create_cs(dw, runtime);
+		dw->reformat = dw_hdmi_reformat_s24;
+		break;
+	}
+	dw->iec_offset = 0;
+	dw->channels = runtime->channels;
+	dw->buf_src  = runtime->dma_area;
+	dw->buf_dst  = substream->dma_buffer.area;
+	dw->buf_addr = substream->dma_buffer.addr;
+	dw->buf_period = snd_pcm_lib_period_bytes(substream);
+	dw->buf_size = snd_pcm_lib_buffer_bytes(substream);
+
+	return 0;
+}
+
+static int dw_hdmi_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct snd_dw_hdmi *dw = substream->private_data;
+	unsigned long flags;
+	int ret = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		spin_lock_irqsave(&dw->lock, flags);
+		dw->buf_offset = 0;
+		dw->substream = substream;
+		dw_hdmi_start_dma(dw);
+		dw_hdmi_audio_enable(dw->data.hdmi);
+		spin_unlock_irqrestore(&dw->lock, flags);
+		substream->runtime->delay = substream->runtime->period_size;
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		spin_lock_irqsave(&dw->lock, flags);
+		dw->substream = NULL;
+		dw_hdmi_stop_dma(dw);
+		dw_hdmi_audio_disable(dw->data.hdmi);
+		spin_unlock_irqrestore(&dw->lock, flags);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static snd_pcm_uframes_t dw_hdmi_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_dw_hdmi *dw = substream->private_data;
+
+	/*
+	 * We are unable to report the exact hardware position as
+	 * reading the 32-bit DMA position using 8-bit reads is racy.
+	 */
+	return bytes_to_frames(runtime, dw->buf_offset);
+}
+
+static struct snd_pcm_ops snd_dw_hdmi_ops = {
+	.open = dw_hdmi_open,
+	.close = dw_hdmi_close,
+	.ioctl = snd_pcm_lib_ioctl,
+	.hw_params = dw_hdmi_hw_params,
+	.hw_free = dw_hdmi_hw_free,
+	.prepare = dw_hdmi_prepare,
+	.trigger = dw_hdmi_trigger,
+	.pointer = dw_hdmi_pointer,
+	.page = snd_pcm_lib_get_vmalloc_page,
+};
+
+static int snd_dw_hdmi_probe(struct platform_device *pdev)
+{
+	const struct dw_hdmi_audio_data *data = pdev->dev.platform_data;
+	struct device *dev = pdev->dev.parent;
+	struct snd_dw_hdmi *dw;
+	struct snd_card *card;
+	struct snd_pcm *pcm;
+	unsigned revision;
+	int ret;
+
+	baikal_hdmi_writeb_relaxed(HDMI_IH_MUTE_AHBDMAAUD_STAT0_ALL,
+		       data->base, HDMI_IH_MUTE_AHBDMAAUD_STAT0);
+	revision = baikal_hdmi_readb_relaxed(data->base, HDMI_REVISION_ID);
+	if (revision != 0x0a && revision != 0x1a && revision != 0x2a) {
+		dev_err(dev, "dw-hdmi-audio: unknown revision 0x%02x\n",
+			revision);
+		return -ENXIO;
+	}
+
+	ret = snd_card_new(dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			      THIS_MODULE, sizeof(struct snd_dw_hdmi), &card);
+	if (ret < 0)
+		return ret;
+
+	strlcpy(card->driver, DRIVER_NAME, sizeof(card->driver));
+	strlcpy(card->shortname, "DW-HDMI", sizeof(card->shortname));
+	snprintf(card->longname, sizeof(card->longname),
+		 "%s rev 0x%02x, irq %d", card->shortname, revision,
+		 data->irq);
+
+	dw = card->private_data;
+	dw->card = card;
+	dw->data = *data;
+	dw->revision = revision;
+
+	spin_lock_init(&dw->lock);
+
+	ret = snd_pcm_new(card, "DW HDMI", 0, 1, 0, &pcm);
+	if (ret < 0)
+		goto err;
+
+	dw->pcm = pcm;
+	pcm->private_data = dw;
+	strlcpy(pcm->name, DRIVER_NAME, sizeof(pcm->name));
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_dw_hdmi_ops);
+
+	/*
+	 * To support 8-channel 96kHz audio reliably, we need 512k
+	 * to satisfy alsa with our restricted period (ERR004323).
+	 */
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+			dev, 128 * 1024, 1024 * 1024);
+
+	ret = snd_card_register(card);
+	if (ret < 0)
+		goto err;
+
+	platform_set_drvdata(pdev, dw);
+
+	return 0;
+
+err:
+	snd_card_free(card);
+	return ret;
+}
+
+static int snd_dw_hdmi_remove(struct platform_device *pdev)
+{
+	struct snd_dw_hdmi *dw = platform_get_drvdata(pdev);
+
+	snd_card_free(dw->card);
+
+	return 0;
+}
+
+#if defined(CONFIG_PM_SLEEP) && defined(IS_NOT_BROKEN)
+/*
+ * This code is fine, but requires implementation in the dw_hdmi_trigger()
+ * method which is currently missing as I have no way to test this.
+ */
+static int snd_dw_hdmi_suspend(struct device *dev)
+{
+	struct snd_dw_hdmi *dw = dev_get_drvdata(dev);
+
+	snd_power_change_state(dw->card, SNDRV_CTL_POWER_D3cold);
+	snd_pcm_suspend_all(dw->pcm);
+
+	return 0;
+}
+
+static int snd_dw_hdmi_resume(struct device *dev)
+{
+	struct snd_dw_hdmi *dw = dev_get_drvdata(dev);
+
+	snd_power_change_state(dw->card, SNDRV_CTL_POWER_D0);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(snd_dw_hdmi_pm, snd_dw_hdmi_suspend,
+			 snd_dw_hdmi_resume);
+#define PM_OPS &snd_dw_hdmi_pm
+#else
+#define PM_OPS NULL
+#endif
+
+static struct platform_driver snd_dw_hdmi_driver = {
+	.probe	= snd_dw_hdmi_probe,
+	.remove	= snd_dw_hdmi_remove,
+	.driver	= {
+		.name = DRIVER_NAME,
+		.pm = PM_OPS,
+	},
+};
+
+module_platform_driver(snd_dw_hdmi_driver);
+
+MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_DESCRIPTION("Synopsis Designware HDMI AHB ALSA interface");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
index 2b7539701b422..0bf907eb3ccb8 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c
@@ -416,6 +416,11 @@ static int dw_hdmi_prepare(struct snd_pcm_substream *substream)
 			HDMI_AHB_DMA_CONF0_INCR8;
 		threshold = 128;
 		break;
+	case 0x2a: /* this revision is used in Baikal-M SoC */
+		conf0 = HDMI_AHB_DMA_CONF0_BURST_MODE |
+			HDMI_AHB_DMA_CONF0_INCR16;
+		threshold = 128;
+		break;
 	default:
 		/* NOTREACHED */
 		return -EINVAL;
@@ -527,7 +532,7 @@ static int snd_dw_hdmi_probe(struct platform_device *pdev)
 	writeb_relaxed(HDMI_IH_MUTE_AHBDMAAUD_STAT0_ALL,
 		       data->base + HDMI_IH_MUTE_AHBDMAAUD_STAT0);
 	revision = readb_relaxed(data->base + HDMI_REVISION_ID);
-	if (revision != 0x0a && revision != 0x1a) {
+	if (revision != 0x0a && revision != 0x1a && revision != 0x2a) {
 		dev_err(dev, "dw-hdmi-audio: unknown revision 0x%02x\n",
 			revision);
 		return -ENXIO;
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ext.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ext.h
new file mode 100644
index 0000000000000..89dce9e5b04c9
--- /dev/null
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ext.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Additional header file used with
+ * DesignWare High-Definition Multimedia Interface (HDMI) driver
+ * for Baikal Electronics BE-M1000 SoC
+ *
+ * This file contains constants which are missing in 4.9.x dw-hdmi.h
+ * and dw-hdmi-audio.h files. These constants are needed for
+ * Baikal Electronics' version of dw-hdmi driver as it is essentially
+ * a backported 4.13.x driver with some patches.
+ *
+ * Copyright (C) 2019 Baikal Electronics, JSC
+ *
+ * Author: Pavel Parkhomenko <Pavel.Parkhomenko@baikalelectronics.ru>
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef __DW_HDMI_EXT_H__
+#define __DW_HDMI_EXT_H__
+
+enum {
+/* PRODUCT_ID0 field values */
+	HDMI_PRODUCT_ID0_HDMI_TX = 0xa0,
+
+/* PRODUCT_ID1 field values */
+	HDMI_PRODUCT_ID1_HDCP = 0xc0,
+	HDMI_PRODUCT_ID1_HDMI_RX = 0x02,
+	HDMI_PRODUCT_ID1_HDMI_TX = 0x01,
+
+/* CONFIG0_ID field values */
+	HDMI_CONFIG0_I2S = 0x10,
+
+/* CONFIG3_ID field values */
+	HDMI_CONFIG3_AHBAUDDMA = 0x02,
+	HDMI_CONFIG3_GPAUD = 0x01,
+
+/* IH_I2CM_STAT0 and IH_MUTE_I2CM_STAT0 field values */
+	HDMI_IH_I2CM_STAT0_DONE = 0x2,
+	HDMI_IH_I2CM_STAT0_ERROR = 0x1,
+
+/* FC_DATAUTO0 field values */
+	HDMI_FC_DATAUTO0_VSD_MASK = 0x08,
+	HDMI_FC_DATAUTO0_VSD_OFFSET = 3,
+
+/* AUD_CONF0 field values */
+	HDMI_AUD_CONF0_SW_RESET = 0x80,
+	HDMI_AUD_CONF0_I2S_ALL_ENABLE = 0x2F,
+
+/* AUD_CONF1 field values */
+	HDMI_AUD_CONF1_MODE_I2S = 0x00,
+	HDMI_AUD_CONF1_MODE_RIGHT_J = 0x02,
+	HDMI_AUD_CONF1_MODE_LEFT_J = 0x04,
+	HDMI_AUD_CONF1_WIDTH_16 = 0x10,
+	HDMI_AUD_CONF1_WIDTH_24 = 0x18,
+
+/* HDMI_AUD_INPUTCLKFS field values */
+	HDMI_AUD_INPUTCLKFS_128FS = 0,
+	HDMI_AUD_INPUTCLKFS_256FS = 1,
+	HDMI_AUD_INPUTCLKFS_512FS = 2,
+	HDMI_AUD_INPUTCLKFS_64FS = 4,
+
+
+/* I2CM_OPERATION field values */
+	HDMI_I2CM_OPERATION_WRITE = 0x10,
+	HDMI_I2CM_OPERATION_READ_EXT = 0x2,
+	HDMI_I2CM_OPERATION_READ = 0x1,
+
+/* I2CM_INT field values */
+	HDMI_I2CM_INT_DONE_POL = 0x8,
+	HDMI_I2CM_INT_DONE_MASK = 0x4,
+
+/* I2CM_CTLINT field values */
+	HDMI_I2CM_CTLINT_NAC_POL = 0x80,
+	HDMI_I2CM_CTLINT_NAC_MASK = 0x40,
+	HDMI_I2CM_CTLINT_ARB_POL = 0x8,
+	HDMI_I2CM_CTLINT_ARB_MASK = 0x4,
+};
+
+/*
+ * HDMI 3D TX PHY registers
+ */
+#define HDMI_3D_TX_PHY_PWRCTRL			0x00
+#define HDMI_3D_TX_PHY_SERDIVCTRL		0x01
+#define HDMI_3D_TX_PHY_SERCKCTRL		0x02
+#define HDMI_3D_TX_PHY_SERCKKILLCTRL		0x03
+#define HDMI_3D_TX_PHY_TXRESCTRL		0x04
+#define HDMI_3D_TX_PHY_CKCALCTRL		0x05
+#define HDMI_3D_TX_PHY_CPCE_CTRL		0x06
+#define HDMI_3D_TX_PHY_TXCLKMEASCTRL		0x07
+#define HDMI_3D_TX_PHY_TXMEASCTRL		0x08
+#define HDMI_3D_TX_PHY_CKSYMTXCTRL		0x09
+#define HDMI_3D_TX_PHY_CMPSEQCTRL		0x0a
+#define HDMI_3D_TX_PHY_CMPPWRCTRL		0x0b
+#define HDMI_3D_TX_PHY_CMPMODECTRL		0x0c
+#define HDMI_3D_TX_PHY_MEASCTRL			0x0d
+#define HDMI_3D_TX_PHY_VLEVCTRL			0x0e
+#define HDMI_3D_TX_PHY_D2ACTRL			0x0f
+#define HDMI_3D_TX_PHY_CURRCTRL			0x10
+#define HDMI_3D_TX_PHY_DRVANACTRL		0x11
+#define HDMI_3D_TX_PHY_PLLMEASCTRL		0x12
+#define HDMI_3D_TX_PHY_PLLPHBYCTRL		0x13
+#define HDMI_3D_TX_PHY_GRP_CTRL			0x14
+#define HDMI_3D_TX_PHY_GMPCTRL			0x15
+#define HDMI_3D_TX_PHY_MPLLMEASCTRL		0x16
+#define HDMI_3D_TX_PHY_MSM_CTRL			0x17
+#define HDMI_3D_TX_PHY_SCRPB_STATUS		0x18
+#define HDMI_3D_TX_PHY_TXTERM			0x19
+#define HDMI_3D_TX_PHY_PTRPT_ENBL		0x1a
+#define HDMI_3D_TX_PHY_PATTERNGEN		0x1b
+#define HDMI_3D_TX_PHY_SDCAP_MODE		0x1c
+#define HDMI_3D_TX_PHY_SCOPEMODE		0x1d
+#define HDMI_3D_TX_PHY_DIGTXMODE		0x1e
+#define HDMI_3D_TX_PHY_STR_STATUS		0x1f
+#define HDMI_3D_TX_PHY_SCOPECNT0		0x20
+#define HDMI_3D_TX_PHY_SCOPECNT1		0x21
+#define HDMI_3D_TX_PHY_SCOPECNT2		0x22
+#define HDMI_3D_TX_PHY_SCOPECNTCLK		0x23
+#define HDMI_3D_TX_PHY_SCOPESAMPLE		0x24
+#define HDMI_3D_TX_PHY_SCOPECNTMSB01		0x25
+#define HDMI_3D_TX_PHY_SCOPECNTMSB2CK		0x26
+
+/* HDMI_3D_TX_PHY_CKCALCTRL values */
+#define HDMI_3D_TX_PHY_CKCALCTRL_OVERRIDE		BIT(15)
+
+/* HDMI_3D_TX_PHY_MSM_CTRL values */
+#define HDMI_3D_TX_PHY_MSM_CTRL_MPLL_PH_SEL_CK		BIT(13)
+#define HDMI_3D_TX_PHY_MSM_CTRL_CKO_SEL_CLK_REF_MPLL	(0 << 1)
+#define HDMI_3D_TX_PHY_MSM_CTRL_CKO_SEL_OFF		(1 << 1)
+#define HDMI_3D_TX_PHY_MSM_CTRL_CKO_SEL_PCLK		(2 << 1)
+#define HDMI_3D_TX_PHY_MSM_CTRL_CKO_SEL_FB_CLK		(3 << 1)
+#define HDMI_3D_TX_PHY_MSM_CTRL_SCOPE_CK_SEL		BIT(0)
+
+/* HDMI_3D_TX_PHY_PTRPT_ENBL values */
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_OVERRIDE		BIT(15)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_PG_SKIP_BIT2		BIT(8)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_PG_SKIP_BIT1		BIT(7)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_PG_SKIP_BIT0		BIT(6)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_CK_REF_ENB		BIT(5)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_RCAL_ENB		BIT(4)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_TX_CLK_ALIGN_ENB	BIT(3)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_TX_READY		BIT(2)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_CKO_WORD_ENB		BIT(1)
+#define HDMI_3D_TX_PHY_PTRPT_ENBL_REFCLK_ENB		BIT(0)
+
+struct dw_hdmi_i2s_audio_data {
+	struct dw_hdmi *hdmi;
+
+	void (*write)(struct dw_hdmi *hdmi, u8 val, int offset);
+	u8 (*read)(struct dw_hdmi *hdmi, int offset);
+};
+
+#endif /* __DW_HDMI_EXT_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 0d39a201c7591..72885a2492edc 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -62,7 +62,7 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
 
 	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
-		val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
+		val, val & GPU_IRQ_RESET_COMPLETED, 5, 10000);
 
 	if (ret) {
 		dev_err(pfdev->dev, "gpu soft reset timed out\n");
@@ -313,26 +313,34 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
 void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 {
 	int ret;
-	u32 val;
+	ktime_t timeout;
 
 	panfrost_gpu_init_quirks(pfdev);
 
 	/* Just turn on everything for now */
 	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present);
-	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
-		val, val == pfdev->features.l2_present, 100, 1000);
-
 	gpu_write(pfdev, STACK_PWRON_LO, pfdev->features.stack_present);
-	ret |= readl_relaxed_poll_timeout(pfdev->iomem + STACK_READY_LO,
-		val, val == pfdev->features.stack_present, 100, 1000);
-
 	gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present);
-	ret |= readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO,
-		val, val == pfdev->features.shader_present, 100, 1000);
-
 	gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present);
-	ret |= readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO,
-		val, val == pfdev->features.tiler_present, 100, 1000);
+
+	timeout = ktime_add_us(ktime_get(), 1000);
+	ret = 0;
+	for (;;) {
+	if (gpu_read(pfdev, L2_READY_LO) ==
+			pfdev->features.l2_present &&
+		    gpu_read(pfdev, STACK_READY_LO) ==
+			pfdev->features.stack_present &&
+		    gpu_read(pfdev, SHADER_READY_LO) ==
+			pfdev->features.shader_present &&
+		    gpu_read(pfdev, TILER_READY_LO) ==
+			pfdev->features.tiler_present)
+			break;
+		if (ktime_compare(ktime_get(), timeout) > 0) {
+			ret = 1;
+			break;
+		}
+		usleep_range(3, 5);
+	}
 
 	if (ret)
 		dev_err(pfdev->dev, "error powering up gpu");
@@ -340,10 +348,11 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 
 void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 {
-	gpu_write(pfdev, TILER_PWROFF_LO, 0);
-	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
-	gpu_write(pfdev, STACK_PWROFF_LO, 0);
-	gpu_write(pfdev, L2_PWROFF_LO, 0);
+	dev_dbg(pfdev->dev, "gpu_power_off...\n");
+	gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
+	gpu_write(pfdev, STACK_PWROFF_LO, pfdev->features.stack_present);
+	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
 }
 
 int panfrost_gpu_init(struct panfrost_device *pfdev)
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 9f770d4546848..ac3ba444cb677 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -133,6 +133,8 @@ static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
 	 * multiple (2) coherent core groups
 	 */
 	affinity = pfdev->features.shader_present;
+	if (panfrost_model_eq(pfdev, 0x620) && js == 1)
+		affinity &= 0xf;
 
 	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF);
 	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32);
@@ -181,7 +183,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 		job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
 
 	/* GO ! */
-	dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx",
+	dev_dbg(pfdev->dev, "JS: Submitting atom %px to js[%d] with head=0x%llx",
 				job, js, jc_head);
 
 	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
@@ -389,7 +391,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	if (dma_fence_is_signaled(job->done_fence))
 		return;
 
-	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
+	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%px",
 		js,
 		job_read(pfdev, JS_CONFIG(js)),
 		job_read(pfdev, JS_STATUS(js)),
@@ -403,7 +405,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
 		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
 
-		drm_sched_stop(sched, sched_job);
+		drm_sched_stop(sched, pfdev->jobs[i] ? &pfdev->jobs[i]->base : NULL);
 		if (js != i)
 			/* Ensure any timeouts on other slots have finished */
 			cancel_delayed_work_sync(&sched->work_tdr);
@@ -414,6 +416,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	spin_lock_irqsave(&pfdev->js->job_lock, flags);
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
 		if (pfdev->jobs[i]) {
+			panfrost_devfreq_record_transition(pfdev, js);
 			pm_runtime_put_noidle(pfdev->dev);
 			pfdev->jobs[i] = NULL;
 		}
@@ -422,7 +425,6 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 
 	/* panfrost_core_dump(pfdev); */
 
-	panfrost_devfreq_record_transition(pfdev, js);
 	panfrost_device_reset(pfdev);
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index b17f3022db5a4..f4de1244722ed 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -31,7 +31,7 @@ static int wait_ready(struct panfrost_device *pfdev, u32 as_nr)
 	/* Wait for the MMU status to indicate there is no active command, in
 	 * case one is pending. */
 	ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr),
-		val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000);
+		val, !(val & AS_STATUS_AS_ACTIVE), 3, 1000);
 
 	if (ret)
 		dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n");
@@ -56,12 +56,29 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
 {
 	u8 region_width;
 	u64 region = iova & PAGE_MASK;
-
-	/* The size is encoded as ceil(log2) minus(1), which may be calculated
-	 * with fls. The size must be clamped to hardware bounds.
+	/*
+	 * fls returns:
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
 	 */
-	size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE);
-	region_width = fls64(size - 1) - 1;
+
+	if (size & ~PAGE_MASK)
+		size = (size >> PAGE_SHIFT) + 1;
+	else
+		size = size >> PAGE_SHIFT;
+	region_width = 10;
+	if (size > 0x80000000) {
+		if (size & 0xffffffff)
+			size = (size >> 32) + 1;
+		else
+			size = size >> 32;
+		region_width += 32;
+	}
+	region_width += fls(size);
+	if (size != (1ul << ((region_width - 11) & 0x1f)))
+		region_width++;
 	region |= region_width;
 
 	/* Lock the region that needs to be updated */
@@ -106,7 +123,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 	u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
 	u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
 
-	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, 1ULL << 48, AS_COMMAND_FLUSH_MEM);
 
 	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
 	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
@@ -114,6 +131,10 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 	/* Need to revisit mem attrs.
 	 * NC is the default, Mali driver is inner WT.
 	 */
+	if (pfdev->features.id == 0x620) {
+		memattr &= ~0xf0f0f0ULL;
+		memattr |= 0x404040;
+	}
 	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL);
 	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32);
 
@@ -177,7 +198,7 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
 	atomic_set(&mmu->as_count, 1);
 	list_add(&mmu->list, &pfdev->as_lru_list);
 
-	dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask);
+	dev_dbg(pfdev->dev, "Assigned AS%d to mmu %px, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask);
 
 	panfrost_mmu_enable(pfdev, mmu);
 
@@ -278,6 +299,8 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
 
 	if (bo->noexec)
 		prot |= IOMMU_NOEXEC;
+	if (bo->is_heap)
+		prot |= IOMMU_CACHE;
 
 	sgt = drm_gem_shmem_get_pages_sgt(obj);
 	if (WARN_ON(IS_ERR(sgt)))
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 2ae3a4d301d39..eddaa62ad8b0e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -318,8 +318,6 @@
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE	(0x3 << 8)
 
-#define AS_LOCK_REGION_MIN_SIZE                 (1ULL << 15)
-
 #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
 #define gpu_read(dev, reg) readl(dev->iomem + reg)
 
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 4ea742ada36de..b295b8a9d92b0 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -385,6 +385,25 @@ config SENSORS_ATXP1
 	  This driver can also be built as a module. If so, the module
 	  will be called atxp1.
 
+config SENSORS_PVT
+	tristate "Baikal PVT"
+	help
+	  If you say yes here you get support for Baikal PVT single
+	  input temperature sensor chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called pvt.
+
+config SENSORS_PVT_I2C
+	tristate "Baikal PVT I2C"
+	depends on I2C && I2C_SLAVE
+	help
+	  If you say yes here you get support for Baikal PVT single input
+	  temperature sensor chips data for Baikal-S SoC through I2C.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called pvt-i2c.
+
 config SENSORS_DS620
 	tristate "Dallas Semiconductor DS620"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 40c036ea45e6b..7b84d6958f5f7 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -160,6 +160,8 @@ obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
 obj-$(CONFIG_SENSORS_STTS751)	+= stts751.o
 obj-$(CONFIG_SENSORS_AMC6821)	+= amc6821.o
 obj-$(CONFIG_SENSORS_TC74)	+= tc74.o
+obj-$(CONFIG_SENSORS_PVT)	+= pvt.o
+obj-$(CONFIG_SENSORS_PVT_I2C)	+= pvt-i2c.o
 obj-$(CONFIG_SENSORS_THMC50)	+= thmc50.o
 obj-$(CONFIG_SENSORS_TMP102)	+= tmp102.o
 obj-$(CONFIG_SENSORS_TMP103)	+= tmp103.o
diff --git a/drivers/hwmon/pvt-i2c.c b/drivers/hwmon/pvt-i2c.c
new file mode 100644
index 0000000000000..d0c9c8785ce93
--- /dev/null
+++ b/drivers/hwmon/pvt-i2c.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C driver for Baikal-S SoC PVT data
+ *
+ * Copyright (C) 2022-2023 Baikal Electronics, JSC
+ * Author: Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+
+#define BAIKAL_SMC_PVT_CMD	0x82000001
+#define PVT_READ		0
+
+#define CA75_0_PVT_BASE		0x4030000
+#define CA75_1_PVT_BASE		0x8030000
+#define CA75_2_PVT_BASE		0xc030000
+#define CA75_3_PVT_BASE		0x10030000
+#define CA75_4_PVT_BASE		0x14030000
+#define CA75_5_PVT_BASE		0x18030000
+#define CA75_6_PVT_BASE		0x1c030000
+#define CA75_7_PVT_BASE		0x20030000
+#define CA75_8_PVT_BASE		0x24030000
+#define CA75_9_PVT_BASE		0x28030000
+#define CA75_10_PVT_BASE	0x2c030000
+#define CA75_11_PVT_BASE	0x30030000
+#define PCIE0_PVT_BASE		0x38030000
+#define PCIE1_PVT_BASE		0x3c030000
+#define PCIE2_PVT_BASE		0x44030000
+#define PCIE3_PVT_BASE		0x48030000
+#define PCIE4_PVT_BASE		0x4c030000
+#define DDR0_PVT_BASE		0x50030000
+#define DDR1_PVT_BASE		0x54030000
+#define DDR2_PVT_BASE		0x58030000
+#define DDR3_PVT_BASE		0x60030000
+#define DDR4_PVT_BASE		0x64030000
+#define DDR5_PVT_BASE		0x68030000
+
+static u32 pvt_i2c_readl(u32 pvt_base, u32 offset)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_PVT_CMD, PVT_READ, pvt_base, offset,
+		      0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+static const u32 pvt_i2c_addr[] = {
+	CA75_0_PVT_BASE,
+	CA75_1_PVT_BASE,
+	CA75_2_PVT_BASE,
+	CA75_3_PVT_BASE,
+	CA75_4_PVT_BASE,
+	CA75_5_PVT_BASE,
+	CA75_6_PVT_BASE,
+	CA75_7_PVT_BASE,
+	CA75_8_PVT_BASE,
+	CA75_9_PVT_BASE,
+	CA75_10_PVT_BASE,
+	CA75_11_PVT_BASE,
+	PCIE0_PVT_BASE,
+	PCIE1_PVT_BASE,
+	PCIE2_PVT_BASE,
+	PCIE3_PVT_BASE,
+	PCIE4_PVT_BASE,
+	DDR0_PVT_BASE,
+	DDR1_PVT_BASE,
+	DDR2_PVT_BASE,
+	DDR3_PVT_BASE,
+	DDR4_PVT_BASE,
+	DDR5_PVT_BASE
+};
+
+static const u8 pvt_i2c_count = ARRAY_SIZE(pvt_i2c_addr);
+
+struct pvt_i2c_data {
+	u8 pvt;
+	u8 reg;
+	u32 reg_val;
+	bool reg_val_valid;
+	u8 write_count;
+	spinlock_t lock;
+};
+
+static bool pvt_i2c_is_in_range(u8 reg)
+{
+	if ((reg >= 0 && reg <= 0x2b) ||
+	    (reg >= 0x40 && reg <= 0x43))
+		return true;
+
+	return false;
+}
+
+static u8 pvt_i2c_read_data(struct pvt_i2c_data *data)
+{
+	u8 *reg = &data->reg;
+
+	if (pvt_i2c_is_in_range(*reg)) {
+		if (!data->reg_val_valid) {
+			data->reg_val = pvt_i2c_readl(pvt_i2c_addr[data->pvt], (*reg >> 2) << 2);
+			data->reg_val_valid = true;
+		}
+
+		return ((u8 *)&data->reg_val)[*reg & 0x3];
+	}
+
+	data->reg_val_valid = false;
+	return 0;
+}
+
+static int pvt_i2c_slave_cb(struct i2c_client *client, enum i2c_slave_event event, u8 *val)
+{
+	struct pvt_i2c_data *data = i2c_get_clientdata(client);
+
+	switch (event) {
+	case I2C_SLAVE_WRITE_REQUESTED:
+		data->write_count = 0;
+		break;
+
+	case I2C_SLAVE_WRITE_RECEIVED:
+		if (data->write_count < 2) {
+			if (!data->write_count) {
+				data->reg = *val;
+				data->reg_val = 0;
+				data->reg_val_valid = false;
+			} else {
+				if (*val < pvt_i2c_count)
+					data->pvt = *val;
+			}
+			++data->write_count;
+		}
+		break;
+
+	case I2C_SLAVE_READ_PROCESSED:
+		/* The previous byte made it to the bus, get next one */
+		if ((data->reg & 0x3) == 3)
+			data->reg_val_valid = false;
+		++data->reg;
+		/* fallthrough */
+	case I2C_SLAVE_READ_REQUESTED:
+		spin_lock(&data->lock);
+		*val = pvt_i2c_read_data(data);
+		spin_unlock(&data->lock);
+		/*
+		 * Do not increment reg here, because we don't know if this
+		 * byte will be actually used. Read Linux I2C slave docs
+		 * for details.
+		 */
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int pvt_i2c_probe(struct i2c_client *client)
+{
+	struct pvt_i2c_data *data;
+	int ret;
+
+	data = devm_kzalloc(&client->dev, sizeof(struct pvt_i2c_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock_init(&data->lock);
+	i2c_set_clientdata(client, data);
+
+	ret = i2c_slave_register(client, pvt_i2c_slave_cb);
+	return ret;
+};
+
+static int pvt_i2c_remove(struct i2c_client *client)
+{
+	i2c_slave_unregister(client);
+	return 0;
+}
+
+static const struct of_device_id pvt_i2c_of_match[] = {
+	{ .compatible = "baikal,bs1000-pvt-i2c" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pvt_i2c_of_match);
+
+static const struct i2c_device_id pvt_i2c_id[] = {
+	{ "pvt-i2c" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pvt_i2c_id);
+
+static struct i2c_driver pvt_i2c_driver = {
+	.driver = {
+		.name = "pvt-i2c",
+		.of_match_table = of_match_ptr(pvt_i2c_of_match)
+	},
+	.probe_new = pvt_i2c_probe,
+	.remove = pvt_i2c_remove,
+	.id_table = pvt_i2c_id
+};
+module_i2c_driver(pvt_i2c_driver);
+
+MODULE_AUTHOR("Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-S SoC I2C PVT driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/pvt.c b/drivers/hwmon/pvt.c
new file mode 100644
index 0000000000000..67876d581d650
--- /dev/null
+++ b/drivers/hwmon/pvt.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * An hwmon driver for Baikal PVT sensors based on Analog Bits.
+ * PVT Sensor Datasheet version: 2014.07.23
+ *
+ * Copyright (C) 2017-2023 Baikal Electronics, JSC
+ * Author: Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/io.h>
+#include <linux/arm-smccc.h>
+
+#define BAIKAL_SMC_PVT_CMD	0x82000001
+#define PVT_READ		0
+#define PVT_WRITE		1
+
+/* "baikal,bm1000-pvt" base addresses */
+#define MMCA57_0_PVT_BASE	0x28200000
+#define MMCA57_1_PVT_BASE	0xc200000
+#define MMCA57_2_PVT_BASE	0xa200000
+#define MMCA57_3_PVT_BASE	0x26200000
+#define MMMALI_PVT_BASE		0x2a060000
+
+/* "baikal,bs1000-pvt" base addresses */
+#define CA75_0_PVT_BASE		0x4030000
+#define CA75_1_PVT_BASE		0x8030000
+#define CA75_2_PVT_BASE		0xc030000
+#define CA75_3_PVT_BASE		0x10030000
+#define CA75_4_PVT_BASE		0x14030000
+#define CA75_5_PVT_BASE		0x18030000
+#define CA75_6_PVT_BASE		0x1c030000
+#define CA75_7_PVT_BASE		0x20030000
+#define CA75_8_PVT_BASE		0x24030000
+#define CA75_9_PVT_BASE		0x28030000
+#define CA75_10_PVT_BASE	0x2c030000
+#define CA75_11_PVT_BASE	0x30030000
+#define PCIE0_PVT_BASE		0x38030000
+#define PCIE1_PVT_BASE		0x3c030000
+#define PCIE2_PVT_BASE		0x44030000
+#define PCIE3_PVT_BASE		0x48030000
+#define PCIE4_PVT_BASE		0x4c030000
+#define DDR0_PVT_BASE		0x50030000
+#define DDR1_PVT_BASE		0x54030000
+#define DDR2_PVT_BASE		0x58030000
+#define DDR3_PVT_BASE		0x60030000
+#define DDR4_PVT_BASE		0x64030000
+#define DDR5_PVT_BASE		0x68030000
+
+/* PVT registers */
+#define PVT_CTRL		0x00
+#define PVT_DATA		0x04
+#define PVT_TTHRES		0x08
+#define PVT_VTHRES		0x0c
+#define PVT_LTHRES		0x10
+#define PVT_ULTHRES		0x14
+#define PVT_STHRES		0x18
+#define PVT_TTIMEOUT		0x1c
+#define PVT_INTR_STAT		0x20
+#define PVT_INTR_MASK		0x24
+#define PVT_RAW_INTR_STAT	0x28
+#define PVT_CLR_INTR		0x2c
+#define PVT_CLKCH_CTL		0x40
+/* PVT VALID bit reads timeout */
+#define PVT_VALID_TIMEOUT	10000
+
+/* PVT values and masks */
+#define PVT_CTRL_EN		BIT(0)
+#define PVT_CTRL_TMOD		0x0
+#define PVT_CTRL_VMOD		0x2
+#define PVT_CTRL_LVTMOD		0b0100
+#define PVT_CTRL_HVTMOD		0b1000
+#define PVT_CTRL_SVTMOD		0b1100
+
+#define PVT_INTR_MASK_TVONLY	0x7e1
+#define PVT_INTR_MASK_ALL	0x7ff
+
+#define PVT_DATA_MASK		0x3ff
+#define PVT_DATA_VALID		BIT(10)
+
+#define PVT_THRES_HI		0xffc00
+#define PVT_THRES_LO		0x3ff
+
+#define PVT_TTIMEOUT_SET	10000000
+
+#define PVT_INTR_STAT_TTHRES_LO	BIT(1)
+#define PVT_INTR_STAT_TTHRES_HI	BIT(2)
+#define PVT_INTR_STAT_VTHRES_LO	BIT(3)
+#define PVT_INTR_STAT_VTHRES_HI	BIT(4)
+
+/* Temperature limits */
+#define TEMP_PVT_MAX		125000
+#define TEMP_PVT_MIN		-40000
+#define TEMP_PVT_WARN		67000
+#define TEMP_PVT_REBOOT		75000
+
+/* Voltage limits */
+#define VOLT_PVT_MIN		800
+#define VOLT_PVT_MAX		1000
+
+#define MAX_POLY_POWER		5
+
+struct pvt_poly_term {
+	unsigned deg;
+	long coef;
+	long divider;
+	long divider_leftover;
+};
+
+/*
+ * struct pvt_poly - PVT data translation polynomial descriptor
+ * @total_divider: total data divider
+ * @terms: polynomial terms up to a free one
+ */
+struct pvt_poly {
+	long total_divider;
+	struct pvt_poly_term terms[MAX_POLY_POWER];
+};
+
+enum chips { bm1000, bs1000, chips_number };
+
+static const struct pvt_poly poly_temp_to_N[chips_number] = {
+	[bm1000] = {
+		.total_divider = 10000,
+		.terms = {
+			{4,   18322, 10000, 10000},
+			{3,    2343, 10000,    10},
+			{2,   87018, 10000,    10},
+			{1,   39269,  1000,     1},
+			{0, 1720400,     1,     1}
+		}
+	},
+	[bs1000] = {
+		.total_divider = 10000,
+		.terms = {
+			{4,   12569, 10000, 10000},
+			{3,    2476, 10000,    10},
+			{2,   66309, 10000,    10},
+			{1,   36384,  1000,     1},
+			{0, 2070500,     1,     1}
+		}
+	}
+};
+
+static const struct pvt_poly poly_N_to_temp[chips_number] = {
+	[bm1000] = {
+		.total_divider = 1,
+		.terms = {
+			{4,  -16743, 1000, 1},
+			{3,   81542, 1000, 1},
+			{2, -182010, 1000, 1},
+			{1,  310200, 1000, 1},
+			{0,  -48380,    1, 1}
+		}
+	},
+	[bs1000] = {
+		.total_divider = 1,
+		.terms = {
+			{4,   16034, 1000, 1},
+			{3,   15608, 1000, 1},
+			{2, -150890, 1000, 1},
+			{1,  334080, 1000, 1},
+			{0,  -62861,    1, 1}
+		}
+	}
+};
+
+static const struct pvt_poly poly_volt_to_N[chips_number] = {
+	[bm1000] = {
+		.total_divider = 10,
+		.terms = {
+			{1,  18658, 1000, 1},
+			{0, -11572,    1, 1}
+		}
+	},
+	[bs1000] = {
+		.total_divider = 10,
+		.terms = {
+			{1, 16757, 1000, 1},
+			{0, -8564,    1, 1}
+		}
+	}
+};
+
+static const struct pvt_poly poly_N_to_volt[chips_number] = {
+	[bm1000] = {
+		.total_divider = 10,
+		.terms = {
+			{1,    100000, 18658,     1},
+			{0, 115720000,     1, 18658}
+		}
+	},
+	[bs1000] = {
+		.total_divider = 10,
+		.terms = {
+			{1,   100000, 16757,     1},
+			{0, 85639000,     1, 16757}
+		}
+	}
+};
+
+/*
+ * Here is the polynomial calculation function, which performs the
+ * redistributed terms calculations. It's pretty straightforward. We walk
+ * over each degree term up to the free one, and perform the redistributed
+ * multiplication of the term coefficient, its divider (as for the rationale
+ * fraction representation), data power and the rational fraction divider
+ * leftover. Then all of this is collected in a total sum variable, which
+ * value is normalized by the total divider before being returned.
+ */
+static long pvt_calc_poly(const struct pvt_poly *poly, long data)
+{
+	const struct pvt_poly_term *term = poly->terms;
+	long tmp, ret = 0;
+	int deg;
+
+	do {
+		tmp = term->coef;
+		for (deg = 0; deg < term->deg; ++deg) {
+			tmp = mult_frac(tmp, data, term->divider);
+		}
+
+		ret += tmp / term->divider_leftover;
+	} while ((term++)->deg);
+
+	return ret / poly->total_divider;
+}
+
+static uint32_t writel_pvt(uint32_t val, uint32_t pvt_base, uint32_t offset)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_PVT_CMD, PVT_WRITE, pvt_base, offset, val,
+		      0, 0, 0, &res);
+
+	return res.a0;
+}
+
+static uint32_t readl_pvt(uint32_t pvt_base, uint32_t offset)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(BAIKAL_SMC_PVT_CMD, PVT_READ, pvt_base, offset,
+		      0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+struct pvt_hwmon {
+	int base;
+	enum chips type_cpu;
+	int irq;
+	const struct mfd_cell *cell;
+	struct device *hwmon;
+	struct completion read_completion;
+	struct mutex lock;
+	int temp;
+	int volt;
+	int svt;
+	int hvt;
+	int lvt;
+	bool mon_mod;
+};
+
+static void switch_to_mon_mod(struct pvt_hwmon *hwmon)
+{
+	/* PVT off */
+	writel_pvt(0, hwmon->base, PVT_CTRL);
+	/* Set timeout of interrupts */
+	writel_pvt(PVT_TTIMEOUT_SET, hwmon->base, PVT_TTIMEOUT);
+	pr_debug("PVT switch_to_mon_mod and set PVT_TTIMEOUT %d\n",
+		 readl_pvt(hwmon->base, PVT_TTIMEOUT));
+
+	/* Mask all interrupts except TTRES_HILO */
+	writel_pvt(PVT_INTR_MASK_TVONLY, hwmon->base, PVT_INTR_MASK);
+	/* Switch to last VOLT or temperature mon_mod */
+	writel_pvt(hwmon->mon_mod << 1, hwmon->base, PVT_CTRL);
+	pr_debug("PVT switch_to_mon_mod and set PVT_CTRL %d\n",
+		 readl_pvt(hwmon->base, PVT_CTRL));
+
+	/* PVT on */
+	writel_pvt(PVT_CTRL_EN | (hwmon->mon_mod << 1), hwmon->base, PVT_CTRL);
+}
+
+static int read_valid_datareg(struct pvt_hwmon *hwmon)
+{
+	register int data, i = 0;
+	data = readl_pvt(hwmon->base, PVT_DATA);
+	data = 0;
+	while (!(data & PVT_DATA_VALID)) {
+		data = readl_pvt(hwmon->base, PVT_DATA);
+		if (++i == PVT_VALID_TIMEOUT) {
+			return -EINVAL;
+		}
+	}
+
+	data &= PVT_DATA_MASK;
+	switch_to_mon_mod(hwmon);
+	return data;
+}
+
+static void switch_pvt_mod(int pvt_base, long mod)
+{
+	pr_debug("PVT now 0x%x but need 0x%lx\n",
+		 readl_pvt(pvt_base, PVT_CTRL), (unsigned long)mod);
+
+	writel_pvt(0, pvt_base, PVT_CTRL);
+	/* Set timeout of PVT measurement */
+	writel_pvt(0, pvt_base, PVT_TTIMEOUT);
+	/* Mask all interrupts */
+	writel_pvt(PVT_INTR_MASK_ALL, pvt_base, PVT_INTR_MASK);
+	writel_pvt(mod, pvt_base, PVT_CTRL);
+	writel_pvt(PVT_CTRL_EN | mod, pvt_base, PVT_CTRL);
+	pr_debug("PVT MOD 0x%x\n", readl_pvt(pvt_base, PVT_CTRL));
+}
+
+static long temp2data(int temp, int tp)
+{
+	if (temp > TEMP_PVT_MAX) {
+		temp = TEMP_PVT_MAX;
+	}
+
+	if (temp < TEMP_PVT_MIN) {
+		temp = TEMP_PVT_MIN;
+	}
+
+	return pvt_calc_poly(&poly_temp_to_N[tp], temp);
+}
+
+static long volt2data(int volt, int tp)
+{
+	if (volt > VOLT_PVT_MAX) {
+		volt = VOLT_PVT_MAX;
+	}
+
+	if (volt < VOLT_PVT_MIN) {
+		volt = VOLT_PVT_MIN;
+	}
+
+	return pvt_calc_poly(&poly_volt_to_N[tp], volt);
+}
+
+static long data2temp(int data, int tp)
+{
+	return pvt_calc_poly(&poly_N_to_temp[tp], data);
+}
+
+static long data2volt(int data, int tp)
+{
+	return pvt_calc_poly(&poly_N_to_volt[tp], data);
+}
+
+static irqreturn_t pvt_hwmon_irq(int irq, void *data)
+{
+	long val;
+	int tmp, temp;
+	struct pvt_hwmon *hwmon = data;
+
+	val = readl_pvt(hwmon->base, PVT_INTR_STAT);
+	if (PVT_INTR_STAT_TTHRES_LO & val) {
+		printk(KERN_INFO "PVT WARNING Lo temperature\n");
+	}
+
+	if (PVT_INTR_STAT_TTHRES_HI & val) {
+		switch_pvt_mod(hwmon->base, PVT_CTRL_TMOD);
+		tmp = read_valid_datareg(hwmon);
+		temp = data2temp(tmp, hwmon->type_cpu);
+		printk(KERN_INFO "PVT WARNING Hi temperature %d\n", temp);
+		if (temp > TEMP_PVT_REBOOT) {
+			printk(KERN_INFO "PVT TOO Hi temperature (%d > %d), pm_power_off!\n",
+			       temp, TEMP_PVT_REBOOT);
+
+			pm_power_off();
+		}
+	}
+
+	if (PVT_INTR_STAT_VTHRES_LO & val) {
+		printk(KERN_INFO "PVT WARNING Lo voltage\n");
+	}
+
+	if (PVT_INTR_STAT_VTHRES_HI & val) {
+		printk(KERN_INFO "PVT WARNING Lo voltage\n");
+	}
+
+	val = readl_pvt(hwmon->base, PVT_CLR_INTR);
+	complete(&hwmon->read_completion);
+	return IRQ_HANDLED;
+}
+
+static ssize_t pvt_show_name(struct device *dev,
+			     struct device_attribute *dev_attr, char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	return sprintf(buf, "pvt@%x\n", hwmon->base);
+}
+
+static ssize_t pvt_show_mon_mod(struct device *dev,
+				struct device_attribute *dev_attr, char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", hwmon->mon_mod);
+}
+
+static ssize_t set_mon_mod(struct device *dev, struct device_attribute *devattr,
+			   const char *buf, size_t count)
+{
+	int err;
+	long data;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	err = kstrtol(buf, 10, &data);
+	if (err) {
+		return err;
+	}
+
+	mutex_lock(&hwmon->lock);
+	hwmon->mon_mod = data;
+	switch_to_mon_mod(hwmon);
+	mutex_unlock(&hwmon->lock);
+	return count;
+}
+
+/* sysfs attributes for hwmon */
+static ssize_t pvt_show_temp(struct device *dev, struct device_attribute *da,
+			     char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	int data, temp;
+
+	mutex_lock(&hwmon->lock);
+	switch_pvt_mod(hwmon->base, PVT_CTRL_TMOD);
+	data = read_valid_datareg(hwmon);
+	temp = data2temp(data, hwmon->type_cpu);
+	hwmon->temp = temp;
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", temp);
+}
+
+static ssize_t set_temp_min(struct device *dev, struct device_attribute *devattr,
+			    const char *buf, size_t count)
+{
+	unsigned val, data;
+	long temp;
+	int err;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	err = kstrtol(buf, 10, &temp);
+	if (err) {
+		return err;
+	}
+
+	mutex_lock(&hwmon->lock);
+	data = readl_pvt(hwmon->base, PVT_TTHRES);
+	val = temp2data(temp, hwmon->type_cpu);
+	data = (data & PVT_THRES_HI) + (PVT_THRES_LO & val);
+	writel_pvt(data, hwmon->base, PVT_TTHRES);
+	mutex_unlock(&hwmon->lock);
+	return count;
+}
+
+static ssize_t set_temp_max(struct device *dev, struct device_attribute *devattr,
+			    const char *buf, size_t count)
+{
+	unsigned val, data;
+	long temp;
+	int err;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	err = kstrtol(buf, 10, &temp);
+	if (err) {
+		return err;
+	}
+
+	mutex_lock(&hwmon->lock);
+	data = readl_pvt(hwmon->base, PVT_TTHRES);
+	val = temp2data(temp, hwmon->type_cpu);
+	data = ((val << 10) & PVT_THRES_HI) + (PVT_THRES_LO & data);
+	writel_pvt(data, hwmon->base, PVT_TTHRES);
+	mutex_unlock(&hwmon->lock);
+	return count;
+}
+
+static ssize_t set_volt_min(struct device *dev, struct device_attribute *devattr,
+			    const char *buf, size_t count)
+{
+	unsigned val, data;
+	long volt;
+	int err;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	err = kstrtol(buf, 10, &volt);
+	if (err) {
+		return err;
+	}
+
+	mutex_lock(&hwmon->lock);
+	data = readl_pvt(hwmon->base, PVT_VTHRES);
+	val = volt2data(volt, hwmon->type_cpu);
+	data = (data & PVT_THRES_HI) + (PVT_THRES_LO & val);
+	writel_pvt(data, hwmon->base, PVT_VTHRES);
+	mutex_unlock(&hwmon->lock);
+	return count;
+}
+
+static ssize_t set_volt_max(struct device *dev, struct device_attribute *devattr,
+			    const char *buf, size_t count)
+{
+	unsigned val, data;
+	long volt;
+	int err;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	err = kstrtol(buf, 10, &volt);
+	if (err) {
+		return err;
+	}
+
+	mutex_lock(&hwmon->lock);
+	data = readl_pvt(hwmon->base, PVT_VTHRES);
+	val = volt2data(volt, hwmon->type_cpu);
+	pr_debug("PVT set volt max %ld and val 0x%x\n", volt, val);
+	data = ((val << 10) & PVT_THRES_HI) + (PVT_THRES_LO & data);
+	writel_pvt(data, hwmon->base, PVT_VTHRES);
+	mutex_unlock(&hwmon->lock);
+	return count;
+}
+
+static ssize_t pvt_show_temp_min(struct device *dev, struct device_attribute *devattr,
+				 char *buf)
+{
+	unsigned val, data;
+	long temp;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	mutex_lock(&hwmon->lock);
+	val = readl_pvt(hwmon->base, PVT_TTHRES);
+	data = PVT_THRES_LO & val;
+	temp = data2temp(data, hwmon->type_cpu);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%ld\n", temp);
+}
+
+static ssize_t pvt_show_temp_max(struct device *dev, struct device_attribute *devattr,
+				 char *buf)
+{
+	long val, data, temp;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	mutex_lock(&hwmon->lock);
+	val = readl_pvt(hwmon->base, PVT_TTHRES);
+	data = (PVT_THRES_HI & val) >> 10;
+	temp = data2temp(data, hwmon->type_cpu);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%ld\n", temp);
+}
+
+static ssize_t pvt_show_volt_min(struct device *dev, struct device_attribute *devattr,
+				 char *buf)
+{
+	unsigned val;
+	long volt, data;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	mutex_lock(&hwmon->lock);
+	val = readl_pvt(hwmon->base, PVT_VTHRES);
+	data = PVT_THRES_LO & val;
+	volt = data2volt(data, hwmon->type_cpu);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%ld\n", volt);
+}
+
+static ssize_t pvt_show_volt_max(struct device *dev, struct device_attribute *devattr,
+				 char *buf)
+{
+	unsigned val, data;
+	int volt;
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+
+	mutex_lock(&hwmon->lock);
+	val = readl_pvt(hwmon->base, PVT_VTHRES);
+	data = (PVT_THRES_HI & val) >> 10;
+	volt = data2volt(data, hwmon->type_cpu);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", volt);
+}
+
+static ssize_t pvt_show_voltage(struct device *dev, struct device_attribute *da,
+				char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	int data, volt;
+
+	mutex_lock(&hwmon->lock);
+	switch_pvt_mod(hwmon->base, PVT_CTRL_VMOD);
+	data = read_valid_datareg(hwmon);
+	volt = data2volt(data, hwmon->type_cpu);
+	hwmon->volt = volt;
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", volt);
+}
+
+static ssize_t lvt_show(struct device *dev, struct device_attribute *da,
+			char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	int data;
+
+	mutex_lock(&hwmon->lock);
+	switch_pvt_mod(hwmon->base, PVT_CTRL_LVTMOD);
+	data = read_valid_datareg(hwmon);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t hvt_show(struct device *dev, struct device_attribute *da,
+			char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	int data;
+
+	mutex_lock(&hwmon->lock);
+	switch_pvt_mod(hwmon->base, PVT_CTRL_HVTMOD);
+	data = read_valid_datareg(hwmon);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t svt_show(struct device *dev, struct device_attribute *da,
+			char *buf)
+{
+	struct pvt_hwmon *hwmon = dev_get_drvdata(dev);
+	int data;
+
+	mutex_lock(&hwmon->lock);
+	switch_pvt_mod(hwmon->base, PVT_CTRL_SVTMOD);
+	data = read_valid_datareg(hwmon);
+	mutex_unlock(&hwmon->lock);
+	return sprintf(buf, "%d\n", data);
+}
+
+static DEVICE_ATTR(name, S_IRUGO, pvt_show_name, NULL);
+static DEVICE_ATTR(temp1_input, S_IRUGO, pvt_show_temp, NULL);
+static DEVICE_ATTR(in1_input, S_IRUGO, pvt_show_voltage, NULL);
+static DEVICE_ATTR(lvt_input, S_IRUGO, lvt_show, NULL);
+static DEVICE_ATTR(hvt_input, S_IRUGO, hvt_show, NULL);
+static DEVICE_ATTR(svt_input, S_IRUGO, svt_show, NULL);
+static DEVICE_ATTR(temp1_min, S_IWUSR | S_IRUGO, pvt_show_temp_min, set_temp_min);
+static DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, pvt_show_temp_max, set_temp_max);
+static DEVICE_ATTR(in1_min, S_IWUSR | S_IRUGO, pvt_show_volt_min, set_volt_min);
+static DEVICE_ATTR(in1_max, S_IWUSR | S_IRUGO, pvt_show_volt_max, set_volt_max);
+static DEVICE_ATTR(mon_mod, S_IWUSR | S_IRUGO, pvt_show_mon_mod, set_mon_mod);
+
+static struct attribute *pvt_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_temp1_input.attr,
+	&dev_attr_temp1_min.attr,
+	&dev_attr_temp1_max.attr,
+	&dev_attr_in1_input.attr,
+	&dev_attr_in1_min.attr,
+	&dev_attr_in1_max.attr,
+	&dev_attr_lvt_input.attr,
+	&dev_attr_hvt_input.attr,
+	&dev_attr_svt_input.attr,
+	&dev_attr_mon_mod.attr,
+	NULL
+};
+
+static const struct attribute_group pvt_attr_group = {
+	.attrs = pvt_attrs
+};
+
+static int pvt_probe(struct platform_device *pdev)
+{
+	int ret, hwmon_type_cpu;
+	unsigned val, data;
+	struct pvt_hwmon *hwmon;
+	struct resource *mem;
+
+	hwmon = devm_kzalloc(&pdev->dev, sizeof(*hwmon), GFP_KERNEL);
+	if (!hwmon) {
+		return -ENOMEM;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hwmon->base = (int)mem->start;
+
+	/*
+	 * Set PVT ID for secure monitor calls.
+	 * TODO: it is a legacy awkward design because
+	 *	 secure monitor has to perform the opposite conversion.
+	 */
+	hwmon_type_cpu = (enum chips)device_get_match_data(&pdev->dev);
+	if (hwmon->type_cpu == bm1000 &&
+		(hwmon->base == MMCA57_0_PVT_BASE ||
+		 hwmon->base == MMCA57_1_PVT_BASE ||
+		 hwmon->base == MMCA57_2_PVT_BASE ||
+		 hwmon->base == MMCA57_3_PVT_BASE ||
+		 hwmon->base == MMMALI_PVT_BASE)) {
+		hwmon->type_cpu = bm1000;
+	} else if (hwmon_type_cpu == bs1000 &&
+		(hwmon->base == CA75_0_PVT_BASE	 ||
+		 hwmon->base == CA75_1_PVT_BASE	 ||
+		 hwmon->base == CA75_2_PVT_BASE	 ||
+		 hwmon->base == CA75_3_PVT_BASE	 ||
+		 hwmon->base == CA75_4_PVT_BASE	 ||
+		 hwmon->base == CA75_5_PVT_BASE	 ||
+		 hwmon->base == CA75_6_PVT_BASE	 ||
+		 hwmon->base == CA75_7_PVT_BASE	 ||
+		 hwmon->base == CA75_8_PVT_BASE	 ||
+		 hwmon->base == CA75_9_PVT_BASE	 ||
+		 hwmon->base == CA75_10_PVT_BASE ||
+		 hwmon->base == CA75_11_PVT_BASE ||
+		 hwmon->base == DDR0_PVT_BASE	 ||
+		 hwmon->base == DDR1_PVT_BASE	 ||
+		 hwmon->base == DDR2_PVT_BASE	 ||
+		 hwmon->base == DDR3_PVT_BASE	 ||
+		 hwmon->base == DDR4_PVT_BASE	 ||
+		 hwmon->base == DDR5_PVT_BASE	 ||
+		 hwmon->base == PCIE0_PVT_BASE	 ||
+		 hwmon->base == PCIE1_PVT_BASE	 ||
+		 hwmon->base == PCIE2_PVT_BASE	 ||
+		 hwmon->base == PCIE3_PVT_BASE	 ||
+		 hwmon->base == PCIE4_PVT_BASE)) {
+		hwmon->type_cpu = bs1000;
+	} else {
+		dev_err(&pdev->dev, "invalid base address or failed to get type: 0x%x\n", hwmon->base);
+		return -EINVAL;
+	}
+
+	hwmon->cell = mfd_get_cell(pdev);
+	if (!(hwmon->type_cpu == bs1000 &&
+		(hwmon->base == PCIE0_PVT_BASE	 ||
+		 hwmon->base == PCIE1_PVT_BASE	 ||
+		 hwmon->base == PCIE2_PVT_BASE	 ||
+		 hwmon->base == CA75_0_PVT_BASE	 ||
+		 hwmon->base == CA75_1_PVT_BASE	 ||
+		 hwmon->base == CA75_2_PVT_BASE	 ||
+		 hwmon->base == CA75_3_PVT_BASE	 ||
+		 hwmon->base == CA75_4_PVT_BASE	 ||
+		 hwmon->base == CA75_5_PVT_BASE	 ||
+		 hwmon->base == CA75_6_PVT_BASE	 ||
+		 hwmon->base == CA75_7_PVT_BASE	 ||
+		 hwmon->base == CA75_8_PVT_BASE	 ||
+		 hwmon->base == CA75_9_PVT_BASE	 ||
+		 hwmon->base == CA75_10_PVT_BASE ||
+		 hwmon->base == CA75_11_PVT_BASE))) {
+		hwmon->irq = platform_get_irq(pdev, 0);
+		if (hwmon->irq < 0) {
+			dev_err(&pdev->dev, "failed to get platform irq: %d\n",
+				hwmon->irq);
+		}
+	}
+
+	init_completion(&hwmon->read_completion);
+	mutex_init(&hwmon->lock);
+
+	/* Mask all interrupts except TTRES_HILO */
+	writel_pvt(PVT_INTR_MASK_TVONLY, hwmon->base, PVT_INTR_MASK);
+	pr_debug("pvt_probe PVT_INTR_MASK 0x%x\n",
+		 readl_pvt(hwmon->base, PVT_INTR_MASK));
+
+	/* Set timeout of PVT measurement */
+	writel_pvt(PVT_TTIMEOUT_SET, hwmon->base, PVT_TTIMEOUT);
+	pr_debug("pvt_probe PVT_TTIMEOUT %d\n",
+		 readl_pvt(hwmon->base, PVT_TTIMEOUT));
+
+	platform_set_drvdata(pdev, hwmon);
+
+	/*
+	 * TODO: it is workaround for BE-S1000 PCIe[0-2] PVTs.
+	 *	 These PVTs generate infinite interrupts. Why?
+	 *	 The issue should be researched thoroughly.
+	 */
+	if (!(hwmon->type_cpu == bs1000 &&
+	      (hwmon->base == PCIE0_PVT_BASE  ||
+	       hwmon->base == PCIE1_PVT_BASE  ||
+	       hwmon->base == PCIE2_PVT_BASE  ||
+	       hwmon->base == CA75_0_PVT_BASE ||
+	       hwmon->base == CA75_1_PVT_BASE ||
+	       hwmon->base == CA75_2_PVT_BASE ||
+	       hwmon->base == CA75_3_PVT_BASE ||
+	       hwmon->base == CA75_4_PVT_BASE ||
+	       hwmon->base == CA75_5_PVT_BASE ||
+	       hwmon->base == CA75_6_PVT_BASE ||
+	       hwmon->base == CA75_7_PVT_BASE ||
+	       hwmon->base == CA75_8_PVT_BASE ||
+	       hwmon->base == CA75_9_PVT_BASE ||
+	       hwmon->base == CA75_10_PVT_BASE ||
+	       hwmon->base == CA75_11_PVT_BASE))) {
+		ret = devm_request_irq(&pdev->dev, hwmon->irq, pvt_hwmon_irq, 0,
+					pdev->name, hwmon);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to request irq: %d\n", ret);
+			/* TODO: ncdofail if no IRQ return ret */
+		}
+	}
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &pvt_attr_group);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to create sysfs group: %d\n", ret);
+		return ret;
+	}
+
+	hwmon->hwmon = hwmon_device_register_with_info(&pdev->dev, pdev->name,
+							hwmon, NULL, NULL);
+	if (IS_ERR(hwmon->hwmon)) {
+		ret = PTR_ERR(hwmon->hwmon);
+		goto err_remove_file;
+	}
+
+	/* Set WARN temperature */
+	mutex_lock(&hwmon->lock);
+	data = readl_pvt(hwmon->base, PVT_TTHRES);
+	val = temp2data(TEMP_PVT_WARN, hwmon->type_cpu);
+	data = ((val << 10) & PVT_THRES_HI) + (PVT_THRES_LO & data);
+	writel_pvt(data, hwmon->base, PVT_TTHRES);
+	mutex_unlock(&hwmon->lock);
+	/* Set monitoring mod for temperature */
+	hwmon->mon_mod = 0;
+	switch_to_mon_mod(hwmon);
+	pr_debug("pvt_probe hwmon_device_register %d\n", ret);
+	return 0;
+
+err_remove_file:
+	sysfs_remove_group(&pdev->dev.kobj, &pvt_attr_group);
+	return ret;
+}
+
+static int pvt_remove(struct platform_device *pdev)
+{
+	struct pvt_hwmon *hwmon = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(hwmon->hwmon);
+	sysfs_remove_group(&pdev->dev.kobj, &pvt_attr_group);
+	return 0;
+}
+
+static const struct of_device_id pvt_dt_match[] = {
+	{ .compatible = "baikal,bm1000-pvt", .data = (void *)bm1000 },
+	{ .compatible = "baikal,bs1000-pvt", .data = (void *)bs1000 },
+	{ }
+};
+
+static struct platform_driver pvt_hwmon_driver = {
+	.probe	= pvt_probe,
+	.remove	= pvt_remove,
+	.driver	= {
+		.name = "pvt-hwmon",
+		.of_match_table = of_match_ptr(pvt_dt_match)
+	}
+};
+module_platform_driver(pvt_hwmon_driver);
+
+MODULE_DESCRIPTION("Baikal PVT driver");
+MODULE_AUTHOR("Maxim Kaurkin <maxim.kaurkin@baikalelectronics.ru>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:pvt-hwmon");
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 854f1b2658b82..31af9fd8d731d 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -437,6 +437,9 @@ config I2C_AXXIA
 	  255 bytes in length. Any attempt to to a larger transfer will return
 	  an error.
 
+config I2C_BAIKAL_SMBUS
+	tristate "Baikal-M SoC SMBus interface"
+
 config I2C_BCM2835
 	tristate "Broadcom BCM2835 I2C controller"
 	depends on ARCH_BCM2835 || ARCH_BRCMSTB
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 3ab8aebc39c90..f926b21e12ad8 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -42,6 +42,7 @@ ifeq ($(CONFIG_I2C_AT91_SLAVE_EXPERIMENTAL),y)
 endif
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_AXXIA)		+= i2c-axxia.o
+obj-$(CONFIG_I2C_BAIKAL_SMBUS)	+= i2c-baikal-smbus.o
 obj-$(CONFIG_I2C_BCM2835)	+= i2c-bcm2835.o
 obj-$(CONFIG_I2C_BCM_IPROC)	+= i2c-bcm-iproc.o
 obj-$(CONFIG_I2C_CADENCE)	+= i2c-cadence.o
diff --git a/drivers/i2c/busses/i2c-baikal-smbus.c b/drivers/i2c/busses/i2c-baikal-smbus.c
new file mode 100644
index 0000000000000..54e3cde292350
--- /dev/null
+++ b/drivers/i2c/busses/i2c-baikal-smbus.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SMBus controller driver for Baikal SoC
+ *
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ * Author: Georgy Vlasov <georgy.vlasov@baikalelectronics.ru>
+ */
+
+#include <linux/clk.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define BAIKAL_SMBUS_CR1	0x00
+#define BAIKAL_CR1_IRT		BIT(0)
+#define BAIKAL_CR1_TRS		BIT(1)
+#define BAIKAL_CR1_IEB		BIT(3)
+
+#define BAIKAL_SMBUS_CR2	0x04
+#define BAIKAL_CR2_FTE		BIT(1)
+
+#define BAIKAL_SMBUS_FBCR1	0x08
+#define BAIKAL_SMBUS_FIFO	0x0c
+#define BAIKAL_SMBUS_SCD1	0x10
+
+#define BAIKAL_SMBUS_SCD2	0x14
+#define BAIKAL_SCD2_MASK	0x03
+#define BAIKAL_SCD2_SHT		BIT(7)
+
+#define BAIKAL_SMBUS_ADR1	0x18
+
+#define BAIKAL_SMBUS_ISR1	0x20
+#define BAIKAL_ISR1_MASK	0x7f
+#define BAIKAL_ISR1_FER		BIT(2)
+#define BAIKAL_ISR1_RNK		BIT(3)
+#define BAIKAL_ISR1_ALD		BIT(4)
+#define BAIKAL_ISR1_TCS		BIT(6)
+
+#define BAIKAL_SMBUS_IMR1	0x24
+#define BAIKAL_SMBUS_FBCR2	0x2c
+#define BAIKAL_SMBUS_IMR2	0x48
+
+#define BAIKAL_SMBUS_FIFO_SIZE	16
+
+struct baikal_smbus_dev {
+	struct device		*dev;
+	void __iomem		*base;
+	struct clk		*smbus_clk;
+	struct i2c_adapter	adapter;
+	u32			bus_clk_rate;
+	u64			smbus_clk_rate;
+};
+
+static int baikal_smbus_init(struct baikal_smbus_dev *smbus)
+{
+	u32 divider = (smbus->smbus_clk_rate == 0 ?
+			clk_get_rate(smbus->smbus_clk) :
+			smbus->smbus_clk_rate) / smbus->bus_clk_rate - 1;
+
+	writel(BAIKAL_CR1_IRT, smbus->base + BAIKAL_SMBUS_CR1);
+	writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+	writel(0, smbus->base + BAIKAL_SMBUS_CR2);
+	writel(0, smbus->base + BAIKAL_SMBUS_FBCR2);
+	writel(0, smbus->base + BAIKAL_SMBUS_IMR1);
+	writel(0, smbus->base + BAIKAL_SMBUS_IMR2);
+	writel(divider & 0xff, smbus->base + BAIKAL_SMBUS_SCD1);
+	writel(BAIKAL_SCD2_SHT | ((divider >> 8) & BAIKAL_SCD2_MASK),
+	       smbus->base + BAIKAL_SMBUS_SCD2);
+
+	return 0;
+}
+
+static int baikal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			     unsigned short flags, char read_write,
+			     u8 command, int size, union i2c_smbus_data *data)
+{
+	struct baikal_smbus_dev *const smbus = adap->dev.driver_data;
+	int ret = -EINVAL;
+
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		writel(BAIKAL_CR1_IRT, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(addr, smbus->base + BAIKAL_SMBUS_ADR1);
+		writel(1, smbus->base + BAIKAL_SMBUS_FBCR1);
+
+		if (read_write == I2C_SMBUS_WRITE) {
+			writel(BAIKAL_CR1_TRS | BAIKAL_CR1_IEB,
+			       smbus->base + BAIKAL_SMBUS_CR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+		} else {
+			writel(BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+		}
+
+		writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+		writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+		while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+		       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+			;
+
+		if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+			ret = -ENXIO;
+			goto exit;
+		}
+
+		if (read_write == I2C_SMBUS_READ) {
+			data->byte = readl(smbus->base + BAIKAL_SMBUS_FIFO);
+
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) & BAIKAL_ISR1_FER) {
+				ret = -EMSGSIZE;
+				goto exit;
+			}
+		}
+
+		ret = 0;
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		writel(BAIKAL_CR1_IRT, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(addr, smbus->base + BAIKAL_SMBUS_ADR1);
+		writel(BAIKAL_CR1_TRS | BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+
+		if (read_write == I2C_SMBUS_WRITE) {
+			writel(2, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+			writel(data->byte, smbus->base + BAIKAL_SMBUS_FIFO);
+		} else {
+			writel(1, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+		}
+
+		writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+		writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+		while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+		       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+			;
+
+		if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+			ret = -ENXIO;
+			goto exit;
+		}
+
+		if (read_write == I2C_SMBUS_READ) {
+			writel(BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+			writel(1, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+			writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+			while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+			       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+				;
+
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+				ret = -ENXIO;
+				goto exit;
+			}
+
+			data->byte = readl(smbus->base + BAIKAL_SMBUS_FIFO);
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) & BAIKAL_ISR1_FER) {
+				ret = -EMSGSIZE;
+				goto exit;
+			}
+		}
+
+		ret = 0;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		writel(BAIKAL_CR1_IRT, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(addr, smbus->base + BAIKAL_SMBUS_ADR1);
+		writel(BAIKAL_CR1_TRS | BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+
+		if (read_write == I2C_SMBUS_WRITE) {
+			writel(3, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+			writel((data->word >> 0) & 0xff,
+			       smbus->base + BAIKAL_SMBUS_FIFO);
+			writel((data->word >> 8) & 0xff,
+			       smbus->base + BAIKAL_SMBUS_FIFO);
+		} else {
+			writel(1, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+		}
+
+		writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+		writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+		while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+		       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+			;
+
+		if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+			(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+			ret = -ENXIO;
+			goto exit;
+		}
+
+		if (read_write == I2C_SMBUS_READ) {
+			writel(BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+			writel(2, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+			writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+			while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+			       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+				;
+
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+				ret = -ENXIO;
+				goto exit;
+			}
+
+			data->word  = readl(smbus->base + BAIKAL_SMBUS_FIFO);
+			data->word |= readl(smbus->base + BAIKAL_SMBUS_FIFO) << 8;
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) & BAIKAL_ISR1_FER) {
+				ret = -EMSGSIZE;
+				goto exit;
+			}
+		}
+
+		ret = 0;
+		break;
+	case I2C_SMBUS_BLOCK_DATA:
+		writel(BAIKAL_CR1_IRT, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+		writel(addr, smbus->base + BAIKAL_SMBUS_ADR1);
+		writel(BAIKAL_CR1_TRS | BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+
+		if (read_write == I2C_SMBUS_WRITE) {
+			unsigned int txedsize = 0;
+
+			while (txedsize < data->block[0] + 1) {
+				unsigned int xblocknum;
+				unsigned int xfersize;
+
+				xfersize = data->block[0] + 1 - txedsize;
+				if (xfersize > BAIKAL_SMBUS_FIFO_SIZE)
+					xfersize = BAIKAL_SMBUS_FIFO_SIZE;
+
+				writel(xfersize, smbus->base + BAIKAL_SMBUS_FBCR1);
+				if (!txedsize) {
+					/*
+					 * The first xfer should start with
+					 * command byte
+					 */
+					writel(command,
+					       smbus->base + BAIKAL_SMBUS_FIFO);
+					xblocknum = 1;
+					++txedsize;
+				} else {
+					xblocknum = 0;
+				}
+
+				while (xblocknum < xfersize) {
+					writel(data->block[txedsize++],
+					       smbus->base + BAIKAL_SMBUS_FIFO);
+					++xblocknum;
+				}
+
+				writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+				writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+				while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+				       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+					(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+					;
+
+				if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+					(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+					ret = -ENXIO;
+					goto exit;
+				}
+			}
+		} else {
+			unsigned int rxedsize = 0;
+
+			writel(1, smbus->base + BAIKAL_SMBUS_FBCR1);
+			writel(command, smbus->base + BAIKAL_SMBUS_FIFO);
+			writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+			writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+			while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+			       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+				;
+
+			if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+				(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+				ret = -ENXIO;
+				goto exit;
+			}
+
+			writel(BAIKAL_CR1_IEB, smbus->base + BAIKAL_SMBUS_CR1);
+
+			while (rxedsize < I2C_SMBUS_BLOCK_MAX) {
+				unsigned int i;
+
+				writel(BAIKAL_SMBUS_FIFO_SIZE, smbus->base + BAIKAL_SMBUS_FBCR1);
+				writel(BAIKAL_ISR1_MASK, smbus->base + BAIKAL_SMBUS_ISR1);
+				writel(BAIKAL_CR2_FTE, smbus->base + BAIKAL_SMBUS_CR2);
+
+				while ((readl(smbus->base + BAIKAL_SMBUS_CR2) & BAIKAL_CR2_FTE) &&
+				       !(readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+					(BAIKAL_ISR1_TCS | BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)))
+					;
+
+				if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+					(BAIKAL_ISR1_ALD | BAIKAL_ISR1_RNK)) {
+					ret = -ENXIO;
+					goto exit;
+				}
+
+				for (i = 0; i < BAIKAL_SMBUS_FIFO_SIZE; ++i) {
+					data->block[1 + rxedsize++] =
+						readl(smbus->base + BAIKAL_SMBUS_FIFO);
+
+					if (readl(smbus->base + BAIKAL_SMBUS_ISR1) &
+							BAIKAL_ISR1_FER) {
+						ret = -EMSGSIZE;
+						goto exit;
+					}
+				}
+			}
+
+			data->block[0] = rxedsize;
+		}
+
+		ret = 0;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+exit:
+	writel(0, smbus->base + BAIKAL_SMBUS_CR1);
+	return ret;
+}
+
+static u32 baikal_functionality(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_BYTE_DATA |
+	       I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+static const struct i2c_algorithm baikal_smbus_algorithm = {
+	.smbus_xfer	= baikal_smbus_xfer,
+	.functionality	= baikal_functionality
+};
+
+static irqreturn_t baikal_smbus_isr(int irq, void *_dev)
+{
+	return IRQ_HANDLED;
+}
+
+static int baikal_smbus_probe(struct platform_device *pdev)
+{
+	struct baikal_smbus_dev *smbus;
+	void __iomem *base;
+	int irq;
+	int ret;
+
+	smbus = devm_kzalloc(&pdev->dev, sizeof(*smbus), GFP_KERNEL);
+	if (!smbus)
+		return -ENOMEM;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "missing interrupt resource\n");
+		return irq;
+	}
+
+	if (dev_of_node(&pdev->dev)) {
+		smbus->smbus_clk = devm_clk_get(&pdev->dev, NULL);
+		if (IS_ERR(smbus->smbus_clk)) {
+			dev_err(&pdev->dev, "missing clock\n");
+			return PTR_ERR(smbus->smbus_clk);
+		}
+
+		ret = clk_prepare_enable(smbus->smbus_clk);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to enable clock\n");
+			return ret;
+		}
+
+		smbus->smbus_clk_rate = 0;
+#ifdef CONFIG_ACPI
+	} else if (to_acpi_device_node(pdev->dev.fwnode) &&
+		   acpi_evaluate_integer(to_acpi_device_node(pdev->dev.fwnode)->handle,
+					 "CLK", NULL, &smbus->smbus_clk_rate)) {
+		dev_err(&pdev->dev, "missing clock-frequency value\n");
+		return -EINVAL;
+#endif
+	}
+
+	smbus->base = base;
+	smbus->dev = &pdev->dev;
+
+	device_property_read_u32(&pdev->dev, "clock-frequency",
+				 &smbus->bus_clk_rate);
+	if (!smbus->bus_clk_rate)
+		smbus->bus_clk_rate = 100000; /* default clock rate */
+
+	ret = devm_request_irq(&pdev->dev, irq, baikal_smbus_isr, 0,
+			       pdev->name, smbus);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to claim IRQ%d\n", irq);
+		return ret;
+	}
+
+	i2c_set_adapdata(&smbus->adapter, smbus);
+	strscpy(smbus->adapter.name, pdev->name, sizeof(smbus->adapter.name));
+	smbus->adapter.owner = THIS_MODULE;
+	smbus->adapter.algo = &baikal_smbus_algorithm;
+	smbus->adapter.dev.parent = &pdev->dev;
+	if (dev_of_node(&pdev->dev))
+		smbus->adapter.dev.of_node = pdev->dev.of_node;
+	else
+		smbus->adapter.dev.fwnode = pdev->dev.fwnode;
+
+	platform_set_drvdata(pdev, smbus);
+
+	ret = baikal_smbus_init(smbus);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to init SMBus\n");
+		goto err_clk;
+	}
+
+	ret = i2c_add_adapter(&smbus->adapter);
+	if (ret)
+		goto err_clk;
+
+	return 0;
+
+err_clk:
+	if (dev_of_node(&pdev->dev))
+		clk_disable_unprepare(smbus->smbus_clk);
+
+	return ret;
+}
+
+static int baikal_smbus_remove(struct platform_device *pdev)
+{
+	struct baikal_smbus_dev *smbus = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(smbus->smbus_clk);
+	i2c_del_adapter(&smbus->adapter);
+	return 0;
+}
+
+static const struct of_device_id baikal_smbus_match[] = {
+	{ .compatible = "baikal,bm1000-smbus" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, baikal_smbus_match);
+
+static struct platform_driver baikal_smbus_driver = {
+	.driver = {
+		.name = "baikal-smbus",
+		.of_match_table = baikal_smbus_match
+	},
+	.probe  = baikal_smbus_probe,
+	.remove = baikal_smbus_remove
+};
+
+module_platform_driver(baikal_smbus_driver);
+
+MODULE_AUTHOR("Georgy Vlasov <georgy.vlasov@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal SMBus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index c55b63750757d..779dade982847 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -466,6 +466,23 @@ config PVPANIC
 	  a paravirtualized device provided by QEMU; it lets a virtual machine
 	  (guest) communicate panic events to the host.
 
+config TP_BMC
+	tristate "T-Platforms Baikal-T(1)/M BMC"
+	depends on I2C && SYSFS
+	depends on OF
+	select PINCTRL
+	select GENERIC_PINCONF
+	select SERIO
+	help
+	  Say Y here if you want to build a driver for T-Platforms BMC devices
+	  embedded into the boards with Baikal-T(1)/M processors. The device main
+	  purpose is the CPU kick-starting as well as some additional side-way
+	  functionality like power on/off buttons state tracing and full device
+	  powering off.
+
+	  If you choose to build module, its name will be tp-bt-bmc. If unsure,
+	  say N here.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c1860d35dc7e2..09c78743e513e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -57,3 +57,4 @@ obj-y				+= cardreader/
 obj-$(CONFIG_PVPANIC)   	+= pvpanic.o
 obj-$(CONFIG_HABANA_AI)		+= habanalabs/
 obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
+obj-$(CONFIG_TP_BMC)		+= tp_bmc.o
diff --git a/drivers/misc/tp_bmc.c b/drivers/misc/tp_bmc.c
new file mode 100644
index 0000000000000..821cb96a9dbef
--- /dev/null
+++ b/drivers/misc/tp_bmc.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/input.h>
+#include <linux/regmap.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/pm.h>
+#include <linux/rtc.h>
+#include <linux/serio.h>
+#include <linux/platform_device.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+
+enum I2C_REGS {
+	R_ID1 = 0,
+	R_ID2,
+	R_ID3,
+	R_ID4,
+	R_SOFTOFF_RQ,
+	R_PWROFF_RQ,
+	R_PWRBTN_STATE,
+	R_VERSION1,
+	R_VERSION2,
+	R_BOOTREASON,
+	R_BOOTREASON_ARG,
+	R_SCRATCH1,
+	R_SCRATCH2,
+	R_SCRATCH3,
+	R_SCRATCH4,
+	R_CAP,
+	R_GPIODIR0,
+	R_GPIODIR1,
+	R_GPIODIR2,
+	R_COUNT
+};
+
+#define BMC_ID1_VAL		0x49
+#define BMC_ID2_VAL		0x54
+#define BMC_ID3_VAL		0x58
+#define BMC_ID4_VAL0		0x32
+#define BMC_ID4_VAL1		0x2
+
+#define BMC_VERSION1		0
+#define BMC_VERSION2		2
+#define BMC_VERSION2_3		3
+
+#define BMC_CAP_PWRBTN		0x1
+#define BMC_CAP_TOUCHPAD	0x2
+#define BMC_CAP_RTC		0x4
+#define BMC_CAP_FRU		0x8
+#define BMC_CAP_GPIODIR		0x10
+
+#define BMC_SERIO_BUFSIZE	7
+
+struct bmc_poll_data {
+	struct i2c_client *c;
+};
+
+static struct i2c_client	*bmc_i2c;
+static struct i2c_client	*rtc_i2c;
+static struct i2c_driver	mitx2_bmc_i2c_driver;
+static struct input_dev		*button_dev;
+static struct bmc_poll_data	poll_data;
+static struct task_struct	*polling_task;
+#ifdef CONFIG_SERIO
+static struct i2c_client	*serio_i2c;
+static struct task_struct	*touchpad_task;
+#endif
+static u8 bmc_proto_version[3];
+static u8 bmc_bootreason[2];
+static u8 bmc_scratch[4];
+static int bmc_cap;
+static const char input_name[] = "BMC input dev";
+static u8 prev_softoff_ret;
+
+static int bmc_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	uint8_t rtc_buf[8];
+	struct i2c_msg msg;
+	int t;
+	int rc;
+
+	msg.addr = client->addr;
+	msg.flags = I2C_M_RD;
+	msg.len = 8;
+	msg.buf = rtc_buf;
+	rc = i2c_transfer(client->adapter, &msg, 1);
+	if (rc != 1) {
+		dev_err(dev, "rtc_read_time: i2c_transfer error %d\n", rc);
+		return rc;
+	}
+
+	tm->tm_sec  = bcd2bin(rtc_buf[0] & 0x7f);
+	tm->tm_min  = bcd2bin(rtc_buf[1] & 0x7f);
+	tm->tm_hour = bcd2bin(rtc_buf[2] & 0x3f);
+	if (rtc_buf[3] & (1 << 6)) /* PM */
+		tm->tm_hour += 12;
+
+	tm->tm_mday = bcd2bin(rtc_buf[4] & 0x3f);
+	tm->tm_mon  = bcd2bin(rtc_buf[5] & 0x1f);
+	t = rtc_buf[5] >> 5;
+	tm->tm_wday = (t == 7) ? 0 : t;
+	tm->tm_year = bcd2bin(rtc_buf[6]) + 100; /* year since 1900 */
+	tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
+	tm->tm_isdst = 0;
+
+	return rtc_valid_tm(tm);
+}
+
+static int bmc_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	uint8_t rtc_buf[8];
+	struct i2c_msg msg;
+	int rc;
+	uint8_t seconds, minutes, hours, wday, mday, month, years;
+
+	seconds	= bin2bcd(tm->tm_sec);
+	minutes	= bin2bcd(tm->tm_min);
+	hours	= bin2bcd(tm->tm_hour);
+	wday	= tm->tm_wday ? tm->tm_wday : 0x7;
+	mday	= bin2bcd(tm->tm_mday);
+	month	= bin2bcd(tm->tm_mon);
+	years	= bin2bcd(tm->tm_year % 100);
+
+	/* TODO: need sanity check? */
+	rtc_buf[0] = seconds;
+	rtc_buf[1] = minutes;
+	rtc_buf[2] = hours;
+	rtc_buf[3] = 0;
+	rtc_buf[4] = mday;
+	rtc_buf[5] = month | (wday << 5);
+	rtc_buf[6] = years;
+	rtc_buf[7] = 0;
+
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = 8;
+	msg.buf = rtc_buf;
+
+	dev_dbg(dev, "rtc_set_time: %08x-%08x\n",
+		*(uint32_t *)&rtc_buf[0],
+		*(uint32_t *)&rtc_buf[4]);
+
+	rc = i2c_transfer(client->adapter, &msg, 1);
+	if (rc != 1)
+		dev_err(dev, "i2c write: %d\n", rc);
+
+	return rc == 1 ? 0 : -EIO;
+}
+
+static const struct rtc_class_ops bmc_rtc_ops = {
+	.read_time = bmc_rtc_read_time,
+	.set_time  = bmc_rtc_set_time,
+};
+
+#ifdef CONFIG_SERIO
+/* BMC serio (PS/2 touchpad) interface */
+
+static int bmc_serio_write(struct serio *id, unsigned char val)
+{
+	struct i2c_client *client = id->port_data;
+	uint8_t buf[4];
+	struct i2c_msg msg;
+	int rc;
+
+	buf[0] = val;
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = 1;
+	msg.buf = buf;
+	dev_dbg(&client->dev, "bmc_serio_write: 0x%02x\n", val);
+	rc = i2c_transfer(client->adapter, &msg, 1);
+	if (rc != 1)
+		dev_err(&client->dev, "i2c write: %d\n", rc);
+
+	return rc == 1 ? 0 : -EIO;
+}
+
+/* returns: -1 on error, +1 if more data available, 0 otherwise */
+static int bmc_serio_read(struct i2c_client *client)
+{
+	struct serio *serio = dev_get_drvdata(&client->dev);
+	int i, rc, cnt;
+	uint8_t buf[BMC_SERIO_BUFSIZE];
+	struct i2c_msg msg;
+
+	msg.addr = client->addr;
+	msg.flags = I2C_M_RD;
+	msg.len = BMC_SERIO_BUFSIZE;
+	msg.buf = buf;
+	rc = i2c_transfer(client->adapter, &msg, 1);
+	if (rc != 1) {
+		dev_err(&client->dev,
+			"bmc_serio_read: i2c_transfer error %d\n", rc);
+		return -1;
+	}
+
+	cnt = buf[0];
+	rc = 0;
+	if (cnt > BMC_SERIO_BUFSIZE - 1) {
+		cnt = BMC_SERIO_BUFSIZE - 1;
+		rc = 1;
+	}
+
+	for (i = 0; i < cnt; i++)
+		serio_interrupt(serio, buf[i + 1], 0);
+
+	return 0;
+}
+
+int touchpad_poll_fn(void *data)
+{
+	int ret;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		while ((ret = bmc_serio_read(serio_i2c)) > 0)
+			;
+
+		if (ret < 0)
+			msleep_interruptible(10000);
+
+		msleep_interruptible(10);
+	}
+
+	return 0;
+}
+#endif /* CONFIG_SERIO */
+
+#ifdef CONFIG_PINCTRL
+static uint8_t bmc_pincf_state [3];
+#define BMC_NPINS	(sizeof(bmc_pincf_state) * 8)
+
+static struct pinctrl_pin_desc bmc_pin_desc[BMC_NPINS] = {
+	PINCTRL_PIN(0, "P0"),
+	PINCTRL_PIN(1, "P1"),
+	PINCTRL_PIN(2, "P2"),
+	PINCTRL_PIN(3, "P3"),
+	PINCTRL_PIN(4, "P4"),
+	PINCTRL_PIN(5, "P5"),
+	PINCTRL_PIN(6, "P6"),
+	PINCTRL_PIN(7, "P7"),
+	PINCTRL_PIN(8, "P8"),
+	PINCTRL_PIN(9, "P9"),
+	PINCTRL_PIN(10, "P10"),
+	PINCTRL_PIN(11, "P11"),
+	PINCTRL_PIN(12, "P12"),
+	PINCTRL_PIN(13, "P13"),
+	PINCTRL_PIN(14, "P14"),
+	PINCTRL_PIN(15, "P15"),
+	PINCTRL_PIN(16, "P16"),
+	PINCTRL_PIN(17, "P17"),
+	PINCTRL_PIN(18, "P18"),
+	PINCTRL_PIN(19, "P19"),
+	PINCTRL_PIN(20, "P20"),
+	PINCTRL_PIN(21, "P21"),
+	PINCTRL_PIN(22, "P22"),
+	PINCTRL_PIN(23, "P23"),
+};
+
+#define PCTRL_DEV	"bmc_pinctrl"
+
+static int bmc_pin_config_get(struct pinctrl_dev *pctldev,
+			      unsigned pin,
+			      unsigned long *config)
+{
+	int idx, bit;
+
+	if (pin > BMC_NPINS)
+		return -EINVAL;
+
+	idx = pin >> 3;
+	bit = pin & 7;
+
+	*config = !!(bmc_pincf_state[idx] & (1 << bit));
+	return 0;
+}
+
+static int bmc_pin_config_set(struct pinctrl_dev *pctldev,
+			      unsigned pin,
+			      unsigned long *config,
+			      unsigned nc)
+{
+	int idx, bit;
+	enum pin_config_param param;
+	int arg;
+
+	if (pin > BMC_NPINS)
+		return -EINVAL;
+
+	idx = pin >> 3;
+	bit = pin & 7;
+
+	param = pinconf_to_config_param (*config);
+	arg = pinconf_to_config_argument (*config);
+	if (param != PIN_CONFIG_OUTPUT)
+		return -EINVAL;
+
+	if (arg)
+		bmc_pincf_state[idx] |= (1 << bit);
+	else
+		bmc_pincf_state[idx] &= ~(1 << bit);
+
+	dev_dbg(&bmc_i2c->dev,
+		"bmc_pin_config_set: pin %u, dir %lu\n", pin, *config);
+
+	return i2c_smbus_write_byte_data(bmc_i2c, R_GPIODIR0 + idx,
+					 bmc_pincf_state[idx]);
+}
+
+void pinconf_generic_dump_config(struct pinctrl_dev *pctldev,
+				 struct seq_file *s, unsigned long config);
+
+void pinctrl_utils_free_map(struct pinctrl_dev *pctldev,
+			    struct pinctrl_map *map, unsigned num_maps);
+
+static const struct pinconf_ops bmc_confops = {
+	.pin_config_get = bmc_pin_config_get,
+	.pin_config_set = bmc_pin_config_set,
+	.pin_config_config_dbg_show = pinconf_generic_dump_config,
+};
+
+static int bmc_groups_count(struct pinctrl_dev *pctldev)
+{
+	return 0;
+}
+
+static const char *bmc_group_name(struct pinctrl_dev *pctldev,
+				  unsigned selector)
+{
+	return NULL;
+}
+
+static const struct pinctrl_ops bmc_ctrl_ops = {
+	.get_groups_count = bmc_groups_count,
+	.get_group_name = bmc_group_name,
+	.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
+	.dt_free_map = pinctrl_utils_free_map,
+};
+
+static struct pinctrl_desc bmc_pincrtl_desc = {
+	.name = PCTRL_DEV,
+	.pins = bmc_pin_desc,
+	.pctlops = &bmc_ctrl_ops,
+	.npins = BMC_NPINS,
+	.confops = &bmc_confops,
+};
+
+static struct pinctrl_dev *bmc_pinctrl_dev;
+
+static int bmc_pinctrl_register(struct device *dev)
+{
+	struct pinctrl_dev *pctrl_dev;
+	struct platform_device *pbdev;
+
+	pbdev = platform_device_alloc(PCTRL_DEV, -1);
+	pbdev->dev.parent = dev;
+	pbdev->dev.of_node = of_find_node_by_name(dev->of_node, "bmc_pinctrl");
+	platform_device_add(pbdev);
+	pctrl_dev = devm_pinctrl_register(&pbdev->dev, &bmc_pincrtl_desc, NULL);
+	if (IS_ERR(pctrl_dev)) {
+		dev_err(&pbdev->dev,
+			"Can't register pinctrl (%ld)\n", PTR_ERR(pctrl_dev));
+		return PTR_ERR(pctrl_dev);
+	} else {
+		dev_dbg(&pbdev->dev, "BMC pinctrl registered\n");
+		bmc_pinctrl_dev = pctrl_dev;
+	}
+	/* reset all pins to default state */
+	i2c_smbus_write_byte_data(to_i2c_client(dev), R_GPIODIR0, 0);
+	i2c_smbus_write_byte_data(to_i2c_client(dev), R_GPIODIR1, 0);
+	i2c_smbus_write_byte_data(to_i2c_client(dev), R_GPIODIR2, 0);
+	return 0;
+}
+
+static void bmc_pinctrl_unregister(void)
+{
+	if (bmc_pinctrl_dev)
+		devm_pinctrl_unregister(&bmc_i2c->dev, bmc_pinctrl_dev);
+}
+#endif
+
+void bmc_pwroff_rq(void)
+{
+	int ret;
+
+	dev_info(&bmc_i2c->dev, "Write reg R_PWROFF_RQ\n");
+	ret = i2c_smbus_write_byte_data(bmc_i2c, R_PWROFF_RQ, 0x01);
+	dev_info(&bmc_i2c->dev, "ret: %i\n", ret);
+}
+
+int pwroff_rq_poll_fn(void *data)
+{
+	int ret;
+
+	while (1) {
+		if (kthread_should_stop())
+			break;
+
+		dev_dbg(&poll_data.c->dev, "Polling\n");
+		ret = i2c_smbus_read_byte_data(poll_data.c, R_SOFTOFF_RQ);
+		dev_dbg(&poll_data.c->dev, "Polling returned: %i\n", ret);
+
+		if (ret < 0) {
+			dev_err(&poll_data.c->dev,
+				"Could not read register 0x%x\n",
+				R_SOFTOFF_RQ);
+			return -EIO;
+		}
+
+		if (prev_softoff_ret != ret) {
+			dev_info(&poll_data.c->dev, "key change [%i]\n", ret);
+			if (ret != 0) {
+				dev_info(&poll_data.c->dev,
+					 "PWROFF \"irq\" detected [%i]\n", ret);
+				input_event(button_dev, EV_KEY, KEY_POWER, 1);
+			} else {
+				input_event(button_dev, EV_KEY, KEY_POWER, 0);
+			}
+
+			input_sync(button_dev);
+			prev_softoff_ret = ret;
+		}
+
+		msleep_interruptible(100);
+	}
+
+	do_exit(1);
+	return 0;
+}
+
+static int mitx2_bmc_validate(struct i2c_client *client)
+{
+	int ret = 0;
+	int i = 0;
+	static const u8 regs[] = {R_ID1, R_ID2, R_ID3};
+	static const u8 vals[] = {BMC_ID1_VAL, BMC_ID2_VAL, BMC_ID3_VAL};
+
+	bmc_proto_version[0] = 0;
+	bmc_proto_version[1] = 0;
+	bmc_proto_version[2] = 0;
+
+	for (i = 0; i < ARRAY_SIZE(regs); i++) {
+		ret = i2c_smbus_read_byte_data(client, regs[i]);
+		if (ret < 0) {
+			dev_err(&client->dev, "Could not read register 0x%x\n",
+				regs[i]);
+			return -EIO;
+		}
+
+		if (ret != vals[i]) {
+			dev_err(&client->dev,
+				"Bad value [0x%02x] in register 0x%x, should be [0x%02x]\n",
+				ret, regs[i], vals[i]);
+			return -ENODEV;
+		}
+	}
+
+	ret = i2c_smbus_read_byte_data(client, R_ID4);
+	if (ret < 0) {
+		dev_err(&client->dev, "Could not read register 0x%x\n", R_ID4);
+		return -EIO;
+	}
+
+	if (ret == BMC_ID4_VAL0) {
+		bmc_proto_version[0] = 0;
+	} else if (ret == BMC_ID4_VAL1) {
+		bmc_proto_version[0] = 2;
+		ret = i2c_smbus_read_byte_data(client, R_VERSION1);
+		if (ret < 0) {
+			dev_err(&client->dev, "Could not read register 0x%x\n",
+				R_VERSION1);
+			return -EIO;
+		}
+
+		bmc_proto_version[1] = ret;
+
+		ret = i2c_smbus_read_byte_data(client, R_VERSION2);
+		if (ret < 0) {
+			dev_err(&client->dev, "Could not read register 0x%x\n",
+				R_VERSION2);
+			return -EIO;
+		}
+
+		bmc_proto_version[2] = ret;
+
+		ret = i2c_smbus_read_byte_data(client, R_BOOTREASON);
+		if (ret < 0) {
+			dev_err(&client->dev, "Could not read register 0x%x\n",
+				R_BOOTREASON);
+			return -EIO;
+		}
+
+		bmc_bootreason[0] = ret;
+
+		ret = i2c_smbus_read_byte_data(client, R_BOOTREASON_ARG);
+		if (ret < 0) {
+			dev_err(&client->dev, "Could not read register 0x%x\n",
+				R_BOOTREASON_ARG);
+			return -EIO;
+		}
+
+		bmc_bootreason[1] = ret;
+
+		for (i = R_SCRATCH1; i <= R_SCRATCH4; i++) {
+			ret = i2c_smbus_read_byte_data(client, i);
+			if (ret < 0) {
+				dev_err(&client->dev,
+					"Could not read register 0x%x\n", i);
+				return -EIO;
+			}
+
+			bmc_scratch[i - R_SCRATCH1] = ret;
+		}
+
+		if (bmc_proto_version[2] >= BMC_VERSION2_3) {
+			ret = i2c_smbus_read_byte_data(client, R_CAP);
+			if (ret >= 0)
+				bmc_cap = ret;
+		} else {
+			bmc_cap = BMC_CAP_PWRBTN;
+		}
+
+		dev_info(&client->dev,
+			 "bootreason %i:%i, extcap 0x%x\n",
+			 bmc_bootreason[0],
+			 bmc_bootreason[1],
+			 bmc_cap);
+	} else {
+		dev_err(&client->dev, "Bad value [0x%02x] in register 0x%x\n",
+			ret, R_ID4);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int bmc_create_client_devices(struct device *bmc_dev)
+{
+	int ret = 0;
+	struct rtc_device *rtc_dev;
+	struct i2c_client *client = to_i2c_client(bmc_dev);
+	int client_addr = client->addr + 1;
+
+	if (bmc_cap & BMC_CAP_TOUCHPAD) {
+#ifdef CONFIG_SERIO
+		struct serio *serio;
+		serio_i2c = i2c_new_ancillary_device(client,
+						     "bmc_serio", client_addr);
+		if (IS_ERR(serio_i2c)) {
+			dev_err(&client->dev, "Can't get serio secondary\n");
+			serio_i2c = NULL;
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		serio = devm_kzalloc(&serio_i2c->dev,
+				     sizeof(struct serio), GFP_KERNEL);
+		if (!serio) {
+			dev_err(&serio_i2c->dev, "Can't allocate serio\n");
+			ret = -ENOMEM;
+			i2c_unregister_device(serio_i2c);
+			serio_i2c = NULL;
+			goto skip_tp;
+		}
+
+		serio->write = bmc_serio_write;
+		serio->port_data = serio_i2c;
+		serio->id.type = SERIO_PS_PSTHRU;
+		serio_register_port(serio);
+		dev_set_drvdata(&serio_i2c->dev, serio);
+		touchpad_task = kthread_run(touchpad_poll_fn, NULL,
+					    "BMC serio poll task");
+skip_tp:
+#endif
+		client_addr++;
+	}
+
+	if (bmc_cap & BMC_CAP_RTC) {
+		rtc_i2c = i2c_new_ancillary_device(client,
+						   "bmc_rtc", client_addr);
+		if (IS_ERR(rtc_i2c)) {
+			dev_err(&client->dev, "Can't get RTC secondary\n");
+			rtc_i2c = NULL;
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		rtc_dev = devm_rtc_device_register(&rtc_i2c->dev, "bmc_rtc",
+						   &bmc_rtc_ops, THIS_MODULE);
+		if (IS_ERR(rtc_dev)) {
+			ret = PTR_ERR(rtc_dev);
+			dev_err(&client->dev,
+				"Failed to register RTC device: %d\n", ret);
+			i2c_unregister_device(rtc_i2c);
+			rtc_i2c = NULL;
+		}
+fail:
+		client_addr++;
+	}
+#ifdef CONFIG_PINCTRL
+	if (bmc_cap & BMC_CAP_GPIODIR || 1 /*vvv*/)
+		bmc_pinctrl_register(bmc_dev);
+#endif
+	return ret;
+}
+
+static int mitx2_bmc_i2c_probe(struct i2c_client *client,
+			       const struct i2c_device_id *id)
+{
+	int err = 0;
+	int i = 0;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	for (i = 0; i < 10; i++) {
+		err = mitx2_bmc_validate(client);
+		if (!err)
+			break;
+
+		msleep_interruptible(20);
+	}
+
+	if (err)
+		return err;
+
+	if (bmc_cap & BMC_CAP_PWRBTN) {
+		button_dev = input_allocate_device();
+		if (!button_dev) {
+			dev_err(&client->dev, "Not enough memory\n");
+			return -ENOMEM;
+		}
+
+		button_dev->id.bustype = BUS_I2C;
+		button_dev->dev.parent = &client->dev;
+		button_dev->name = input_name;
+		button_dev->phys = "bmc-input0";
+		button_dev->evbit[0] = BIT_MASK(EV_KEY);
+		button_dev->keybit[BIT_WORD(KEY_POWER)] = BIT_MASK(KEY_POWER);
+
+		err = input_register_device(button_dev);
+		if (err) {
+			dev_err(&client->dev, "Failed to register device\n");
+			input_free_device(button_dev);
+			return err;
+		}
+
+		dev_dbg(&client->dev, "Starting polling thread\n");
+		poll_data.c = client;
+		polling_task = kthread_run(pwroff_rq_poll_fn, NULL,
+					   "BMC poll task");
+	}
+
+	if (bmc_cap || 1 /*vvv*/)
+		err = bmc_create_client_devices(&client->dev);
+
+	bmc_i2c = client;
+	/* register as poweroff handler */
+	pm_power_off = bmc_pwroff_rq;
+
+	return 0;
+}
+
+static int mitx2_bmc_i2c_remove(struct i2c_client *client)
+{
+#ifdef CONFIG_SERIO
+	struct serio *serio;
+#endif
+	if (button_dev) {
+		kthread_stop(polling_task);
+		input_unregister_device(button_dev);
+	}
+#ifdef CONFIG_SERIO
+	if (serio_i2c) {
+		kthread_stop(touchpad_task);
+		serio = dev_get_drvdata(&serio_i2c->dev);
+		serio_unregister_port(serio);
+		i2c_unregister_device(serio_i2c);
+	}
+#endif
+	if (rtc_i2c)
+		i2c_unregister_device(rtc_i2c);
+#ifdef CONFIG_PINCTRL
+	bmc_pinctrl_unregister();
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id mitx2_bmc_of_match[] = {
+	{ .compatible = "tp,mitx2-bmc" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mitx2_bmc_of_match);
+#endif
+
+static const struct i2c_device_id mitx2_bmc_i2c_id[] = {
+	{ "mitx2_bmc", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mitx2_bmc_i2c_id);
+
+static ssize_t version_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%i.%i.%i\n", bmc_proto_version[0],
+					  bmc_proto_version[1],
+					  bmc_proto_version[2]);
+}
+
+static struct kobj_attribute version_attribute =
+	__ATTR(version, 0664, version_show, NULL);
+
+static ssize_t bootreason_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%i\n", bmc_bootreason[0] |
+				   (bmc_bootreason[1] << 8));
+}
+
+static struct kobj_attribute bootreason_attribute =
+	__ATTR(bootreason, 0664, bootreason_show, NULL);
+
+static ssize_t scratch_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%i\n", bmc_scratch[0] |
+				   (bmc_scratch[1] <<  8) |
+				   (bmc_scratch[2] << 16) |
+				   (bmc_scratch[3] << 24));
+}
+
+static struct kobj_attribute scratch_attribute =
+	__ATTR(scratch, 0664, scratch_show, NULL);
+
+static struct attribute *bmc_attrs[] = {
+	&version_attribute.attr,
+	&bootreason_attribute.attr,
+	&scratch_attribute.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(bmc);
+
+static struct i2c_driver mitx2_bmc_i2c_driver = {
+	.driver	  = {
+		.name = "mitx2-bmc",
+		.of_match_table = of_match_ptr(mitx2_bmc_of_match),
+		.groups = bmc_groups,
+	},
+	.probe	  = mitx2_bmc_i2c_probe,
+	.remove	  = mitx2_bmc_i2c_remove,
+	.id_table = mitx2_bmc_i2c_id,
+};
+module_i2c_driver(mitx2_bmc_i2c_driver);
+
+MODULE_AUTHOR("Konstantin Kirik");
+MODULE_DESCRIPTION("mITX2 BMC driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("serial:bmc");
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 6793fb8fe976b..496fb9dc6dad1 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -19,6 +19,8 @@
 #define BOUNDARY_OK(addr, len) \
 	((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1)))
 
+#define MSHC_INPUT_DIVIDER 2  /* mshc_tx_x2 to mshc_tx divider */
+
 struct dwcmshc_priv {
 	struct clk	*bus_clk;
 };
@@ -46,11 +48,50 @@ static void dwcmshc_adma_write_desc(struct sdhci_host *host, void **desc,
 	sdhci_adma_write_desc(host, desc, addr, len, cmd);
 }
 
+static unsigned int dwcmshc_get_max_clock (struct sdhci_host *host)
+{
+	/*
+	// todo: fix min\max clk rate
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	return clk_round_rate(pltfm_host->clk, ULONG_MAX);
+	*/
+	struct mmc_host *mmc = host->mmc;
+	struct device *dev = mmc_dev(mmc);
+	unsigned int ret;
+	if (device_property_read_u32(dev, "max-clock", &ret) < 0) {
+		ret = 1*1000*1000;
+	}
+	return ret;
+}
+
+static unsigned int dwcmshc_get_min_clock (struct sdhci_host *host)
+{
+	/*
+	// todo: fix min\max clk rate
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	return clk_round_rate(pltfm_host->clk, ULONG_MIN);
+	*/
+	return 150*1000;   /* tx2 = (pll=1200MHz)/(div=250*16) = 300/2 = 150KHz */
+}
+
+static void dwcmshc_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	struct sdhci_pltfm_host *pltfm_host;
+	if (clock == 0) {
+		return;
+	}
+	pltfm_host = sdhci_priv(host);
+	clk_set_rate(pltfm_host->clk, clock * MSHC_INPUT_DIVIDER);
+	host->mmc->actual_clock = clk_get_rate(pltfm_host->clk) / MSHC_INPUT_DIVIDER;
+	sdhci_enable_clk(host, 0);
+}
+
 static const struct sdhci_ops sdhci_dwcmshc_ops = {
-	.set_clock		= sdhci_set_clock,
+	.set_clock		= dwcmshc_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
-	.get_max_clock		= sdhci_pltfm_clk_get_max_clock,
+	.get_max_clock		= dwcmshc_get_max_clock,
+	.get_min_clock		= dwcmshc_get_min_clock,
 	.reset			= sdhci_reset,
 	.adma_write_desc	= dwcmshc_adma_write_desc,
 };
@@ -58,7 +99,9 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = {
 static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = {
 	.ops = &sdhci_dwcmshc_ops,
 	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+		SDHCI_QUIRK2_BROKEN_64_BIT_DMA_MASK |
+		SDHCI_QUIRK2_NO_1_8_V,
 };
 
 static int dwcmshc_probe(struct platform_device *pdev)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 115feb9aa2364..069b440c19c0b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1031,6 +1031,24 @@ static void sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 		__sdhci_set_timeout(host, cmd);
 }
 
+/* Set the DMA boundary value and block size */
+static inline void sdhci_set_block_info(struct sdhci_host *host, struct mmc_data *data)
+{
+	/* Set the DMA boundary value and block size */
+	sdhci_writew(host, SDHCI_MAKE_BLKSZ(host->sdma_boundary, data->blksz), SDHCI_BLOCK_SIZE);
+	/*
+	 * For Version 4.10 onwards, if v4 mode is enabled, 32-bit Block Count
+	 * can be supported, in that case 16-bit block count register must be 0.
+	 */
+	if (host->version >= SDHCI_SPEC_410 && host->v4_mode && (host->quirks2 & SDHCI_QUIRK2_USE_32BIT_BLK_CNT)) {
+		if (sdhci_readw(host, SDHCI_BLOCK_COUNT))
+			sdhci_writew(host, 0, SDHCI_BLOCK_COUNT);
+		sdhci_writew(host, data->blocks, SDHCI_32BIT_BLK_CNT);
+	} else {
+		sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
+	}
+}
+
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 {
 	struct mmc_data *data = cmd->data;
@@ -1139,22 +1157,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 
 	sdhci_set_transfer_irqs(host);
 
-	/* Set the DMA boundary value and block size */
-	sdhci_writew(host, SDHCI_MAKE_BLKSZ(host->sdma_boundary, data->blksz),
-		     SDHCI_BLOCK_SIZE);
-
-	/*
-	 * For Version 4.10 onwards, if v4 mode is enabled, 32-bit Block Count
-	 * can be supported, in that case 16-bit block count register must be 0.
-	 */
-	if (host->version >= SDHCI_SPEC_410 && host->v4_mode &&
-	    (host->quirks2 & SDHCI_QUIRK2_USE_32BIT_BLK_CNT)) {
-		if (sdhci_readw(host, SDHCI_BLOCK_COUNT))
-			sdhci_writew(host, 0, SDHCI_BLOCK_COUNT);
-		sdhci_writew(host, data->blocks, SDHCI_32BIT_BLK_CNT);
-	} else {
-		sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
-	}
+	sdhci_set_block_info(host, data);
 }
 
 static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
@@ -1216,7 +1219,7 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 			if (cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
 				sdhci_writew(host, 0x0, SDHCI_TRANSFER_MODE);
 		} else {
-		/* clear Auto CMD settings for no data CMDs */
+			/* clear Auto CMD settings for no data CMDs */
 			mode = sdhci_readw(host, SDHCI_TRANSFER_MODE);
 			sdhci_writew(host, mode & ~(SDHCI_TRNS_AUTO_CMD12 |
 				SDHCI_TRNS_AUTO_CMD23), SDHCI_TRANSFER_MODE);
@@ -2898,9 +2901,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 		 */
 		if (host->pending_reset)
 			return;
+		/*
+		 * TODO: fix false positive interrupts on MBM board.
 		pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
 		       mmc_hostname(host->mmc), (unsigned)intmask);
 		sdhci_dumpregs(host);
+		*/
 		return;
 	}
 
@@ -3644,7 +3650,8 @@ static int sdhci_set_dma_mask(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_64_BIT_DMA;
 
 	/* Try 64-bit mask if hardware is capable  of it */
-	if (host->flags & SDHCI_USE_64_BIT_DMA) {
+	if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_64_BIT_DMA_MASK) &&
+	     (host->flags & SDHCI_USE_64_BIT_DMA)) {
 		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 		if (ret) {
 			pr_warn("%s: Failed to set 64-bit DMA mask.\n",
@@ -3888,15 +3895,13 @@ int sdhci_setup_host(struct sdhci_host *host)
 		dma_addr_t dma;
 		void *buf;
 
-		if (host->flags & SDHCI_USE_64_BIT_DMA) {
-			host->adma_table_sz = host->adma_table_cnt *
-					      SDHCI_ADMA2_64_DESC_SZ(host);
-			host->desc_sz = SDHCI_ADMA2_64_DESC_SZ(host);
-		} else {
-			host->adma_table_sz = host->adma_table_cnt *
-					      SDHCI_ADMA2_32_DESC_SZ;
-			host->desc_sz = SDHCI_ADMA2_32_DESC_SZ;
-		}
+		if (!(host->flags & SDHCI_USE_64_BIT_DMA))
+			host->alloc_desc_sz = SDHCI_ADMA2_32_DESC_SZ;
+		else if (!host->alloc_desc_sz)
+			host->alloc_desc_sz = SDHCI_ADMA2_64_DESC_SZ(host);
+
+		host->desc_sz = host->alloc_desc_sz;
+		host->adma_table_sz = host->adma_table_cnt * host->desc_sz;
 
 		host->align_buffer_sz = SDHCI_MAX_SEGS * SDHCI_ADMA2_ALIGN;
 		/*
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 00df46a75be57..7132156159cf9 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -223,6 +223,7 @@
 #define  SDHCI_SUPPORT_SDR50	0x00000001
 #define  SDHCI_SUPPORT_SDR104	0x00000002
 #define  SDHCI_SUPPORT_DDR50	0x00000004
+#define  SDHCI_SUPPORT_UHS2	0x00000008
 #define  SDHCI_DRIVER_TYPE_A	0x00000010
 #define  SDHCI_DRIVER_TYPE_C	0x00000020
 #define  SDHCI_DRIVER_TYPE_D	0x00000040
@@ -234,6 +235,7 @@
 #define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
 #define  SDHCI_CLOCK_MUL_SHIFT	16
 #define  SDHCI_CAN_DO_ADMA3	0x08000000
+#define  SDHCI_SUPPORT_VDD2_18	0x10000000
 #define  SDHCI_SUPPORT_HS400	0x80000000 /* Non-standard */
 
 #define SDHCI_CAPABILITIES_1	0x44
@@ -287,6 +289,12 @@
 #define   SDHCI_SPEC_410	4
 #define   SDHCI_SPEC_420	5
 
+
+#define SDHCI_EMMC_CONTROL	0x52C
+#define  SDHCI_EMMC_TYPE_MMC	(1 << 0)  /* 0-sd,     1-mmc     */
+#define  SDHCI_EMMC_CRC_DISABLE	(1 << 1)  /* 0-enable, 1-disable */
+#define  SDHCI_EMMC_DONT_RESET	(1 << 2)  /* 0-reset,  1-dont    */
+
 /*
  * End of controller registers.
  */
@@ -484,9 +492,11 @@ struct sdhci_host {
  * block count.
  */
 #define SDHCI_QUIRK2_USE_32BIT_BLK_CNT			(1<<18)
+#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA_MASK		(1<<19)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
+	phys_addr_t mapbase;	/* physical address base */
 	char *bounce_buffer;	/* For packing SDMA reads/writes */
 	dma_addr_t bounce_addr;
 	unsigned int bounce_buffer_size;
@@ -537,6 +547,7 @@ struct sdhci_host {
 	bool pending_reset;	/* Cmd/data reset is pending */
 	bool irq_wake_enabled;	/* IRQ wakeup is enabled */
 	bool v4_mode;		/* Host Version 4 Enable */
+	bool use_external_dma;	/* Host selects to use external DMA */
 
 	struct mmc_request *mrqs_done[SDHCI_MAX_MRQS];	/* Requests done */
 	struct mmc_command *cmd;	/* Current command */
@@ -560,6 +571,7 @@ struct sdhci_host {
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
 	unsigned int desc_sz;	/* ADMA descriptor size */
+	unsigned int alloc_desc_sz;	/* ADMA descr. max size host supports */
 
 	struct workqueue_struct *complete_wq;	/* Request completion wq */
 	struct work_struct	complete_work;	/* Request completion work */
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index dc7f5db544087..a705af72e4f79 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2487,6 +2487,8 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "w25q256jvm", INFO(0xef7019, 0, 64 * 1024, 512,
 			     SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "w25q512jv", INFO(0xef7020, 0, 64 * 1024, 1024,
+				 SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) },
 	{ "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
 			SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
 
diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig
index e5c571fd232cc..4ecb67d4fcff7 100644
--- a/drivers/mux/Kconfig
+++ b/drivers/mux/Kconfig
@@ -58,4 +58,13 @@ config MUX_MMIO
 	  To compile the driver as a module, choose M here: the module will
 	  be called mux-mmio.
 
+config MUX_BAIKAL
+	tristate "Baikal-S SoC low speed peripheral IO Multiplexer"
+	help
+	  The driver allows to control three switches of the multiplexer
+	  independently:
+	  - 8 GPIOs or UART/SMBus/SMBus;
+	  - 8 GPIOs or SPI;
+	  - 16 GPIOs or eSPI.
+
 endmenu
diff --git a/drivers/mux/Makefile b/drivers/mux/Makefile
index 6e9fa47daf566..af1454c869def 100644
--- a/drivers/mux/Makefile
+++ b/drivers/mux/Makefile
@@ -8,9 +8,11 @@ mux-adg792a-objs		:= adg792a.o
 mux-adgs1408-objs		:= adgs1408.o
 mux-gpio-objs			:= gpio.o
 mux-mmio-objs			:= mmio.o
+mux-baikal-objs			:= baikal.o
 
 obj-$(CONFIG_MULTIPLEXER)	+= mux-core.o
 obj-$(CONFIG_MUX_ADG792A)	+= mux-adg792a.o
 obj-$(CONFIG_MUX_ADGS1408)	+= mux-adgs1408.o
 obj-$(CONFIG_MUX_GPIO)		+= mux-gpio.o
 obj-$(CONFIG_MUX_MMIO)		+= mux-mmio.o
+obj-$(CONFIG_MUX_BAIKAL)	+= mux-baikal.o
diff --git a/drivers/mux/baikal.c b/drivers/mux/baikal.c
new file mode 100644
index 0000000000000..1e3ce1f73e624
--- /dev/null
+++ b/drivers/mux/baikal.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Multiplexer driver for Baikal-S SoC low speed peripheral devices
+ *
+ * Copyright (C) 2022-2023 Baikal Electronics, JSC
+ * Author: Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/of_platform.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/mux/driver.h>
+#include <linux/platform_device.h>
+
+#define BAIKAL_SMC_LSP_MUX		0x82000600
+#define BAIKAL_LSP_MUX_COUNT		3
+#define BAIKAL_LSP_MUX_STATE_COUNT	2
+#define BAIKAL_LSP_MUX_AS_IS		2
+
+static u8 uart_smbus = BAIKAL_LSP_MUX_AS_IS;
+module_param(uart_smbus, byte, 0);
+MODULE_PARM_DESC(uart_smbus, "UART/SMBus/SMBus. 0=Disable, 1=Enable");
+
+static u8 qspi = BAIKAL_LSP_MUX_AS_IS;
+module_param(qspi, byte, 0);
+MODULE_PARM_DESC(qspi, "QSPI. 0=Disable, 1=Enable");
+
+static u8 espi = BAIKAL_LSP_MUX_AS_IS;
+module_param(espi, byte, 0);
+MODULE_PARM_DESC(espi, "eSPI. 0=Disable, 1=Enable");
+
+static const u8 * const baikal_lsp_mux_init_value[3] = {
+	&uart_smbus,
+	&qspi,
+	&espi
+};
+
+struct baikal_lsp_mux_priv {
+	struct fwnode_handle **fwnode[BAIKAL_LSP_MUX_STATE_COUNT];
+	struct platform_device **plat_dev[BAIKAL_LSP_MUX_STATE_COUNT];
+	u8 count[BAIKAL_LSP_MUX_STATE_COUNT];
+	bool acpi_node_enabled[BAIKAL_LSP_MUX_STATE_COUNT];
+	struct mutex lock;
+	struct mux_control *mux;
+	struct platform_device *pdev;
+};
+
+static char *baikal_lsp_mux_property(unsigned int state)
+{
+	if (acpi_disabled)
+		return state ? "mux-state1" : "mux-state0";
+	else
+		return state ? "DEV1" : "DEV0";
+}
+
+static struct platform_device *
+baikal_lsp_mux_channel_create_device(struct fwnode_handle *fwnode,
+				     struct device *parent)
+{
+	if (acpi_disabled) {
+		return of_platform_device_create(to_of_node(fwnode), NULL,
+						 parent);
+	} else {
+		return acpi_create_platform_device(to_acpi_device_node(fwnode),
+						   NULL);
+	}
+}
+
+static void baikal_lsp_mux_channel_remove_device(struct platform_device *pdev)
+{
+	if (acpi_disabled) {
+		of_platform_device_destroy(&pdev->dev, NULL);
+	} else {
+		platform_device_unregister(pdev);
+	}
+}
+
+#ifdef CONFIG_ACPI
+static int baikal_lsp_mux_channel_enable_nodes_acpi(struct baikal_lsp_mux_priv *priv,
+						    unsigned int state)
+{
+	union acpi_object val = {
+		.type = ACPI_TYPE_INTEGER,
+		.integer.type = ACPI_TYPE_INTEGER,
+		.integer.value = state
+	};
+	struct acpi_object_list args = {
+		.count = 1,
+		.pointer = &val
+	};
+	struct acpi_device *adev =
+		to_acpi_device_node(priv->pdev->dev.fwnode);
+	struct device *ref_dev;
+	struct platform_device **pdev;
+	struct fwnode_handle **fwnode;
+	u64 is_enabled;
+	acpi_status status;
+	int i;
+
+	status = acpi_evaluate_integer(adev->handle,
+				       state ? "STA1" : "STA0",
+				       NULL,
+				       &is_enabled);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adev->dev, "failed to get state for %s\n",
+			baikal_lsp_mux_property(i));
+		return -ENODEV;
+	}
+
+	if (is_enabled) {
+		priv->acpi_node_enabled[state] = true;
+		return 0;
+	}
+
+	status = acpi_evaluate_object(adev->handle, "INIT", &args,
+				      NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adev->dev, "failed to enable %s devices\n",
+			baikal_lsp_mux_property(i));
+		return -ENODEV;
+	}
+
+	priv->acpi_node_enabled[state] = true;
+
+	for (i = 0; i < priv->count[state]; ++i) {
+		fwnode = &priv->fwnode[state][i];
+		pdev = &priv->plat_dev[state][i];
+
+		if (!*fwnode)
+			continue;
+
+		adev = to_acpi_device_node(*fwnode);
+		acpi_scan_lock_acquire();
+		if (adev)
+			acpi_bus_scan(adev->handle);
+		acpi_scan_lock_release();
+
+		ref_dev = bus_find_device_by_fwnode(&platform_bus_type,
+						    *fwnode);
+		if (ref_dev)
+			*pdev = to_platform_device(ref_dev);
+	}
+
+	/* Device nodes were created by this function */
+	return 1;
+}
+#else
+static inline int baikal_lsp_mux_channel_enable_nodes_acpi(struct baikal_lsp_mux_priv *priv,
+							   unsigned int state)
+{
+	return 0;
+}
+#endif
+
+static void baikal_lsp_mux_channel_remove_devices(struct baikal_lsp_mux_priv *priv,
+						  unsigned int state)
+{
+	struct platform_device **pdev;
+	unsigned int i;
+
+	if (state >= BAIKAL_LSP_MUX_STATE_COUNT)
+		return;
+
+	for (i = 0; i < priv->count[state]; ++i) {
+		pdev = &priv->plat_dev[state][i];
+
+		if (*pdev) {
+			baikal_lsp_mux_channel_remove_device(*pdev);
+			*pdev = NULL;
+		}
+	}
+}
+
+static int baikal_lsp_mux_channel_switch_devices(struct baikal_lsp_mux_priv *priv,
+						 unsigned int state)
+{
+	struct device *parent = priv->mux->chip->dev.parent;
+	struct platform_device **pdev;
+	struct fwnode_handle **fwnode;
+	int i, ret = 0;
+
+	state = !!state;
+
+	mutex_lock(&priv->lock);
+
+	if (priv->mux->cached_state == state)
+		goto mux_unlock;
+
+	if (priv->mux->cached_state == MUX_IDLE_AS_IS) {
+		for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i)
+			baikal_lsp_mux_channel_remove_devices(priv, i);
+	} else {
+		baikal_lsp_mux_channel_remove_devices(priv, !state);
+	}
+
+	ret = mux_control_select(priv->mux, state);
+	if (ret)
+		goto mux_unlock;
+
+	ret = mux_control_deselect(priv->mux);
+	if (ret)
+		goto mux_unlock;
+
+	if (priv->mux->cached_state == MUX_IDLE_AS_IS) {
+		baikal_lsp_mux_channel_remove_devices(priv, state);
+		ret = -EPROTO;
+		goto mux_unlock;
+	}
+
+	if (!acpi_disabled && !priv->acpi_node_enabled[state]) {
+		ret = baikal_lsp_mux_channel_enable_nodes_acpi(priv, state);
+
+		/* Check if device nodes were created */
+		if (ret == 1) {
+			ret = 0;
+			goto mux_unlock;
+		}
+	}
+
+	if (!ret) {
+		for (i = 0; i < priv->count[state]; ++i) {
+			fwnode = &priv->fwnode[state][i];
+			pdev = &priv->plat_dev[state][i];
+
+			if (!*fwnode)
+				continue;
+
+			if (!*pdev) {
+				*pdev = baikal_lsp_mux_channel_create_device(*fwnode,
+									     parent);
+				if (IS_ERR_OR_NULL(*pdev)) {
+					*pdev = NULL;
+				}
+			} else {
+				baikal_lsp_mux_channel_remove_device(*pdev);
+			}
+		}
+	}
+
+mux_unlock:
+	mutex_unlock(&priv->lock);
+	return ret;
+}
+
+static ssize_t baikal_lsp_mux_channel_store(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct baikal_lsp_mux_priv *priv = dev_get_drvdata(dev);
+	unsigned int n = 0;
+	int ret;
+
+	ret = kstrtouint(buf, 10, &n);
+	if (ret)
+		return ret;
+
+	ret = baikal_lsp_mux_channel_switch_devices(priv, n);
+
+	ret = ret ? ret : count;
+
+	return ret;
+}
+
+static ssize_t baikal_lsp_mux_channel_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct baikal_lsp_mux_priv *priv = dev_get_drvdata(dev);
+	return sprintf(buf, "%i\n", priv->mux->cached_state);
+}
+
+static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, baikal_lsp_mux_channel_show,
+		   baikal_lsp_mux_channel_store);
+
+static struct attribute *baikal_lsp_mux_channel_attrs[] = {
+	&dev_attr_enabled.attr,
+	NULL
+};
+
+static const struct attribute_group baikal_lsp_mux_channel_attr_group = {
+	.attrs = baikal_lsp_mux_channel_attrs
+};
+
+static struct baikal_lsp_mux_priv *
+baikal_lsp_mux_channel_get_of_data(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *mux_dev;
+	struct baikal_lsp_mux_priv *priv;
+	int count[BAIKAL_LSP_MUX_STATE_COUNT];
+	int i, j;
+
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i) {
+		count[i] = of_count_phandle_with_args(np,
+					      baikal_lsp_mux_property(i), NULL);
+		if (count[i] < 1) {
+			dev_err(dev, "invalid number of phandles in '%s' property\n",
+				baikal_lsp_mux_property(i));
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	priv = devm_kzalloc(dev, sizeof(*priv) +
+				 2 * (count[0] + count[1]) * sizeof(void *),
+			    GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->count[0] = count[0];
+	priv->count[1] = count[1];
+	priv->fwnode[0] = (struct fwnode_handle **)(priv + 1);
+	priv->fwnode[1] = priv->fwnode[0] + priv->count[0];
+	priv->plat_dev[0] = (struct platform_device **)(priv->fwnode[1] +
+			    priv->count[1]);
+	priv->plat_dev[1] = priv->plat_dev[0] + priv->count[0];
+
+	priv->mux = mux_control_get(dev, NULL);
+	if (IS_ERR(priv->mux))
+		return ERR_CAST(priv->mux);
+
+	for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i) {
+		for (j = 0; j < priv->count[i]; ++j) {
+			mux_dev = of_parse_phandle(np,
+						   baikal_lsp_mux_property(i),
+						   j);
+			if (!mux_dev) {
+				dev_err(dev, "invalid phandle%i in '%s' property\n",
+					j, baikal_lsp_mux_property(i));
+				return ERR_PTR(-EINVAL);
+			}
+
+			priv->fwnode[i][j] = of_fwnode_handle(mux_dev);
+		}
+	}
+
+	return priv;
+}
+
+#ifdef CONFIG_ACPI
+static struct mux_control *
+baikal_lsp_mux_channel_get_mux_acpi(struct acpi_device *adev)
+{
+	struct device *dev = &adev->dev, *mux_chip_dev;
+	struct acpi_device *mux;
+	struct mux_chip *mux_chip;
+	union acpi_object *package = NULL;
+	union acpi_object *element = NULL;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status = AE_OK;
+	unsigned int id;
+	int ret;
+
+	status = acpi_evaluate_object_typed(adev->handle, "MUX", NULL,
+					    &buffer, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get MUX data\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	package = buffer.pointer;
+	if (package->package.count != 2) {
+		dev_err(dev, "invalid MUX data\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	element = &(package->package.elements[0]);
+	if (element->type != ACPI_TYPE_LOCAL_REFERENCE ||
+	    !element->reference.handle) {
+		dev_err(dev, "invalid MUX reference\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = acpi_bus_get_device(element->reference.handle, &mux);
+	if (ret) {
+		dev_err(dev, "failed to process MUX reference\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	element = &(package->package.elements[1]);
+	if (element->type != ACPI_TYPE_INTEGER) {
+		dev_err(dev, "failed to get MUX index\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	id = element->integer.value;
+
+	mux_chip_dev = bus_find_device_by_fwnode(&platform_bus_type,
+						 acpi_fwnode_handle(mux));
+	if (!mux_chip_dev) {
+		dev_err(dev, "failed to get mux chip device\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	mux_chip = dev_get_drvdata(mux_chip_dev);
+	if (!mux_chip) {
+		dev_err(dev, "failed to get mux chip\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	if (id >= mux_chip->controllers) {
+		dev_err(dev, "bad mux controller %u\n", id);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+
+err:
+	acpi_os_free(buffer.pointer);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &mux_chip->mux[id];
+}
+
+static struct baikal_lsp_mux_priv *
+baikal_lsp_mux_channel_get_acpi_data(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+	struct acpi_device *mux_dev;
+	struct baikal_lsp_mux_priv *priv;
+	int count[BAIKAL_LSP_MUX_STATE_COUNT];
+	struct acpi_buffer buffer[BAIKAL_LSP_MUX_STATE_COUNT];
+	union acpi_object *package = NULL;
+	union acpi_object *element = NULL;
+	acpi_status status = AE_OK;
+	int i, j, ret;
+
+	if (!adev)
+		return ERR_PTR(-ENODEV);
+
+	for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i) {
+		buffer[i].length = ACPI_ALLOCATE_BUFFER;
+		buffer[i].pointer = NULL;
+		status = acpi_evaluate_object_typed(adev->handle,
+						    baikal_lsp_mux_property(i),
+						    NULL,
+						    &buffer[i],
+						    ACPI_TYPE_PACKAGE);
+		if (ACPI_FAILURE(status)) {
+			dev_err(dev, "failed to get %s data\n",
+				baikal_lsp_mux_property(i));
+			return ERR_PTR(-ENODEV);
+		}
+
+		package = buffer[i].pointer;
+		count[i] = package->package.count;
+		if (count[i] < 1) {
+			dev_err(dev, "invalid number of references in %s\n",
+				baikal_lsp_mux_property(i));
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
+	priv = devm_kzalloc(dev, sizeof(*priv) +
+				 2 * (count[0] + count[1]) * sizeof(void *),
+			    GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	priv->count[0] = count[0];
+	priv->count[1] = count[1];
+	priv->fwnode[0] = (struct fwnode_handle **)(priv + 1);
+	priv->fwnode[1] = priv->fwnode[0] + priv->count[0];
+	priv->plat_dev[0] = (struct platform_device **)(priv->fwnode[1] +
+			    priv->count[1]);
+	priv->plat_dev[1] = priv->plat_dev[0] + priv->count[0];
+
+	priv->mux = baikal_lsp_mux_channel_get_mux_acpi(adev);
+	if (IS_ERR(priv->mux)) {
+		ret = PTR_ERR(priv->mux);
+		goto err;
+	}
+
+	for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i) {
+		package = buffer[i].pointer;
+		for (j = 0; j < priv->count[i]; ++j) {
+			element = &(package->package.elements[j]);
+
+			if (element->type != ACPI_TYPE_LOCAL_REFERENCE ||
+			    !element->reference.handle) {
+				dev_err(dev, "invalid reference%i in %s\n", j,
+					baikal_lsp_mux_property(i));
+				ret = -EINVAL;
+				goto err;
+			}
+
+			ret = acpi_bus_get_device(element->reference.handle,
+						  &mux_dev);
+			if (ret) {
+				dev_err(dev, "failed to process reference%i in %s\n",
+					j, baikal_lsp_mux_property(i));
+				ret = -EINVAL;
+				goto err;
+			}
+
+			priv->fwnode[i][j] = acpi_fwnode_handle(mux_dev);
+		}
+	}
+
+	ret = 0;
+
+err:
+	for (i = 0; i < BAIKAL_LSP_MUX_STATE_COUNT; ++i)
+		acpi_os_free(buffer[i].pointer);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return priv;
+}
+#else /* CONFIG_ACPI */
+static inline struct baikal_lsp_mux_priv *
+baikal_lsp_mux_channel_get_acpi_data(struct platform_device *pdev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+static int baikal_lsp_mux_channel_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct baikal_lsp_mux_priv *priv;
+	unsigned int id;
+	int ret;
+
+	if (acpi_disabled)
+		priv = baikal_lsp_mux_channel_get_of_data(pdev);
+	else
+		priv = baikal_lsp_mux_channel_get_acpi_data(pdev);
+
+	if (IS_ERR(priv)) {
+		dev_err(dev, "failed to get mux data\n");
+		return PTR_ERR(priv);
+	}
+
+	priv->pdev = pdev;
+	mutex_init(&priv->lock);
+	platform_set_drvdata(pdev, priv);
+
+	id = mux_control_get_index(priv->mux);
+	ret = baikal_lsp_mux_channel_switch_devices(priv,
+						*baikal_lsp_mux_init_value[id]);
+	if (ret) {
+		dev_err(dev, "failed to initialize mux devices: %i\n", ret);
+		return ret;
+	}
+
+	ret = sysfs_create_group(&dev->kobj, &baikal_lsp_mux_channel_attr_group);
+	if (ret) {
+		dev_err(dev, "failed to create sysfs group: %i\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id baikal_lsp_mux_channel_of_match[] = {
+	{ .compatible = "baikal,bs1000-lsp-mux-channel" },
+	{}
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id baikal_lsp_mux_channel_acpi_match[] = {
+	{ "BKLE0003" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, baikal_lsp_mux_channel_acpi_match);
+#endif
+
+static struct platform_driver baikal_lsp_mux_channel_driver = {
+	.driver = {
+		.name = "baikal-lsp-mux-channel",
+		.of_match_table = baikal_lsp_mux_channel_of_match,
+		.acpi_match_table = ACPI_PTR(baikal_lsp_mux_channel_acpi_match),
+		.suppress_bind_attrs = true
+	},
+	.probe = baikal_lsp_mux_channel_probe
+};
+
+static DEFINE_MUTEX(baikal_lsp_mux_lock);
+
+static int baikal_lsp_mux_set(struct mux_control *mux, int state)
+{
+	struct arm_smccc_res res;
+
+	mutex_lock(&baikal_lsp_mux_lock);
+	arm_smccc_smc(BAIKAL_SMC_LSP_MUX,
+		      state << mux_control_get_index(mux) & 0x7,
+		      0, 0, 0, 0, 0, 0, &res);
+	mutex_unlock(&baikal_lsp_mux_lock);
+
+	return res.a0;
+}
+
+static const struct mux_control_ops baikal_lsp_mux_ops = {
+	.set = baikal_lsp_mux_set
+};
+
+static int baikal_lsp_mux_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mux_chip *mux_chip;
+	int i, ret;
+
+	mux_chip = devm_mux_chip_alloc(dev, BAIKAL_LSP_MUX_COUNT, 0);
+	if (IS_ERR(mux_chip)) {
+		dev_err(dev, "failed to allocate mux chip\n");
+		return PTR_ERR(mux_chip);
+	}
+
+	mux_chip->ops = &baikal_lsp_mux_ops;
+
+	if (uart_smbus == BAIKAL_LSP_MUX_AS_IS) {
+		uart_smbus = !!device_property_present(dev, "uart-smbus-enabled");
+	}
+	baikal_lsp_mux_set(&mux_chip->mux[0], uart_smbus);
+
+	if (qspi == BAIKAL_LSP_MUX_AS_IS) {
+		qspi = !!device_property_present(dev, "qspi-enabled");
+	}
+	baikal_lsp_mux_set(&mux_chip->mux[1], qspi);
+
+	if (espi == BAIKAL_LSP_MUX_AS_IS) {
+		espi = !!device_property_present(dev, "espi-enabled");
+	}
+	baikal_lsp_mux_set(&mux_chip->mux[2], espi);
+
+	for (i = 0; i < mux_chip->controllers; ++i)
+		mux_chip->mux[i].states = BAIKAL_LSP_MUX_STATE_COUNT;
+
+	ret = devm_mux_chip_register(dev, mux_chip);
+	if (ret) {
+		dev_err(dev, "failed to register mux chip\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, mux_chip);
+
+	return 0;
+}
+
+static const struct of_device_id baikal_lsp_mux_of_match[] = {
+	{ .compatible = "baikal,bs1000-lsp-mux" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, baikal_lsp_mux_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id baikal_lsp_mux_acpi_match[] = {
+	{ "BKLE0002" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, baikal_lsp_mux_acpi_match);
+#endif
+
+static struct platform_driver baikal_lsp_mux_driver = {
+	.driver = {
+		.name = "baikal-lsp-mux",
+		.of_match_table = baikal_lsp_mux_of_match,
+		.acpi_match_table = ACPI_PTR(baikal_lsp_mux_acpi_match),
+		.suppress_bind_attrs = true
+	},
+	.probe = baikal_lsp_mux_probe
+};
+
+static struct platform_driver * const baikal_lsp_mux_drivers[] = {
+	&baikal_lsp_mux_driver,
+	&baikal_lsp_mux_channel_driver
+};
+
+static int __init baikal_lsp_mux_init(void)
+{
+	return platform_register_drivers(baikal_lsp_mux_drivers,
+					 ARRAY_SIZE(baikal_lsp_mux_drivers));
+}
+module_init(baikal_lsp_mux_init);
+
+static void __exit baikal_lsp_mux_exit(void)
+{
+	platform_unregister_drivers(baikal_lsp_mux_drivers,
+				    ARRAY_SIZE(baikal_lsp_mux_drivers));
+}
+module_exit(baikal_lsp_mux_exit);
+
+MODULE_AUTHOR("Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-S SoC low speed peripheral mux driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 9f965cdfff5c9..a9ff57a82a92d 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -179,6 +179,7 @@ config AMD_XGBE
 	select BITREVERSE
 	select CRC32
 	select PHYLIB
+	select PHYLINK
 	select AMD_XGBE_HAVE_ECC if X86
 	imply PTP_1588_CLOCK
 	---help---
@@ -202,4 +203,11 @@ config AMD_XGBE_HAVE_ECC
 	bool
 	default n
 
+config BAIKAL_XGBE
+	bool "Baikal XGBE support"
+	default n
+	depends on AMD_XGBE
+	---help---
+	  Say Y to enable Baikal XGBE support
+
 endif # NET_VENDOR_AMD
diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile
index 620785ffbd519..24e21b66dd14f 100644
--- a/drivers/net/ethernet/amd/xgbe/Makefile
+++ b/drivers/net/ethernet/amd/xgbe/Makefile
@@ -7,6 +7,7 @@ amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \
 		 xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \
 		 xgbe-platform.o
 
+amd-xgbe-$(CONFIG_BAIKAL_XGBE) += baikal-mdio.o
 amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o
 amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o
 amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o
diff --git a/drivers/net/ethernet/amd/xgbe/baikal-mdio.c b/drivers/net/ethernet/amd/xgbe/baikal-mdio.c
new file mode 100644
index 0000000000000..cbdae38ef5490
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/baikal-mdio.c
@@ -0,0 +1,647 @@
+/*
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+#include <linux/clk.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#ifndef VR_XS_PMA_MII_Gen5_MPLL_CTRL
+#define VR_XS_PMA_MII_Gen5_MPLL_CTRL			0x807a
+#endif
+#define VR_XS_PMA_MII_Gen5_MPLL_CTRL_REF_CLK_SEL_bit	(1 << 13)
+#define VR_XS_PCS_DIG_CTRL1				0x8000
+#define VR_XS_PCS_DIG_CTRL1_VR_RST_Bit			MDIO_CTRL1_RESET
+#define SR_XC_or_PCS_MMD_Control1			MDIO_CTRL1
+#define SR_XC_or_PCS_MMD_Control1_RST_Bit		MDIO_CTRL1_RESET
+#define DWC_GLBL_PLL_MONITOR				0x8010
+#define SDS_PCS_CLOCK_READY_mask			0x1c
+#define SDS_PCS_CLOCK_READY_bit				0x10
+#define VR_XS_PMA_MII_ENT_GEN5_GEN_CTL			0x809c
+#define VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LANE_MODE_KX4	(4 << 0)
+#define VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LANE_MODE_MASK	0x0007
+#define VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LINK_WIDTH_4	(2 << 8)
+#define VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LINK_WIDTH_MASK	0x0700
+#define VR_XS_OR_PCS_MMD_DIGITAL_CTL1_VR_RST		(1 << 15)
+
+#define DELAY_COUNT	50
+
+static int baikal_xgbe_an_restart_kr_training(struct xgbe_prv_data *pdata)
+{
+	int reg = 0;
+
+	DBGPR("%s\n", __FUNCTION__);
+
+	/* Restart training */
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, 0x0096, 3);
+	msleep(500);
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, 0x0096, 1);
+
+	/* The worse case when training continue till 500ms */
+	msleep(500);
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, 0x0097);
+	/* Check training failure */
+	if (reg & (1 << 3))
+		return -1;
+
+	/* Success */
+	return 0;
+}
+
+static int baikal_xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata)
+{
+	DBGPR("%s\n", __FUNCTION__);
+
+	/* Enable training */
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, 0x0096, 2);
+	return 0;
+}
+
+static int baikal_xgbe_phy_pcs_power_cycle(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	DBGPR("%s\n", __FUNCTION__);
+
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	ret |= MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	usleep_range(75, 100);
+
+	ret &= ~MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+	return 0;
+}
+
+static int baikal_xgbe_phy_xgmii_mode_kx4(struct xgbe_prv_data *pdata)
+{
+	int ret, count;
+
+	DBGPR_MDIO("%s\n", __FUNCTION__);
+
+	/* Write 2'b01 to Bits[1:0] of SR PCS Control2 to set the xpcx_kr_0
+	 * output to 0.
+	 */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	ret &= ~MDIO_PCS_CTRL2_TYPE;
+	ret |= MDIO_PCS_CTRL2_10GBX;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, ret);
+
+	/* Set Bit 13 SR PMA MMD Control1 Register (for back plane) to 1. */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_CTRL1);
+	ret |= 0x2000;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_CTRL1, ret);
+
+	/* Set LANE_MODE TO KX4 (4). */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_ENT_GEN5_GEN_CTL);
+	ret &= ~VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LANE_MODE_MASK;
+	ret |= VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LANE_MODE_KX4;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_ENT_GEN5_GEN_CTL, ret);
+
+	/* Set LANE_WIDTH (2) 4 lanes per link. */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_ENT_GEN5_GEN_CTL);
+	ret &= ~VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LINK_WIDTH_MASK;
+	ret |= VR_XS_PMA_MII_ENT_GEN5_GEN_CTL_LINK_WIDTH_4;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_ENT_GEN5_GEN_CTL, ret);
+
+	/* Initiate Software Reset. */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1);
+	ret |= VR_XS_OR_PCS_MMD_DIGITAL_CTL1_VR_RST;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1, ret);
+
+	/* Wait until reset done. */
+	count = DELAY_COUNT;
+	do {
+		msleep(20);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1);
+	} while (!!(ret & VR_XS_OR_PCS_MMD_DIGITAL_CTL1_VR_RST) && --count);
+
+	if (ret & VR_XS_OR_PCS_MMD_DIGITAL_CTL1_VR_RST)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int baikal_xgbe_phy_xgmii_mode_kr(struct xgbe_prv_data *pdata)
+{
+	int ret;
+	DBGPR("%s\n", __FUNCTION__);
+
+	/* Enable KR training */
+	ret = baikal_xgbe_an_enable_kr_training(pdata);
+	if (ret < 0)
+		return ret;
+
+	/* Set PCS to KR/10G speed */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	ret &= ~MDIO_PCS_CTRL2_TYPE;
+	ret |= MDIO_PCS_CTRL2_10GBR;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, ret);
+
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	ret &= ~MDIO_CTRL1_SPEEDSEL;
+	ret |= MDIO_CTRL1_SPEED10G;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	ret = baikal_xgbe_phy_pcs_power_cycle(pdata);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int baikal_xgbe_phy_xgmii_mode(struct xgbe_prv_data *pdata)
+{
+	struct device *dev = pdata->dev;
+	char mode[32];
+	const char *pm = mode;
+
+	if (!device_property_read_string(dev, "be,pcs-mode", &pm)) {
+		if (strcasecmp(pm, "KX4") == 0) {
+			DBGPR("xgbe: mode KX4 = 0x%X function: %s\n", mode, __FUNCTION__);
+			return baikal_xgbe_phy_xgmii_mode_kx4(pdata);
+		}
+	}
+
+	DBGPR("xgbe: mode KR = 0x%X function: %s\n", mode, __FUNCTION__);
+	return baikal_xgbe_phy_xgmii_mode_kr(pdata);
+}
+
+static int __maybe_unused baikal_xgbe_phy_soft_reset(struct xgbe_prv_data *pdata)
+{
+	int count, ret;
+	DBGPR("%s\n", __FUNCTION__);
+
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	ret |= MDIO_CTRL1_RESET;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, ret);
+
+	count = DELAY_COUNT;
+	do {
+		msleep(20);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+		if (ret < 0)
+			return ret;
+	} while ((ret & MDIO_CTRL1_RESET) && --count);
+
+	if (ret & MDIO_CTRL1_RESET)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int baikal_xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
+{
+	int reg;
+
+	DBGPR("%s\n", __FUNCTION__);
+
+	pdata->link_check = jiffies;
+	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1);
+
+	/* Disable auto negotiation in any case! */
+	reg &= ~MDIO_AN_CTRL1_ENABLE;
+	pdata->phy.autoneg = AUTONEG_DISABLE;
+
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg);
+	return 0;
+}
+
+static int ext_phy_probe(struct device *dev, struct phy_device **phy_dev)
+{
+	struct phy_device *phydev = NULL;
+	int ret;
+
+	if (acpi_disabled) {
+		struct device_node *xmit_node;
+
+		/* Retrieve the xmit-handle */
+		xmit_node = of_parse_phandle(dev->of_node, "ext-phy-handle", 0);
+		if (!xmit_node)
+			return -ENODEV;
+
+		phydev = of_phy_find_device(xmit_node);
+		if (!phydev)
+			return -ENODEV;
+		} else {
+			struct device *d;
+			struct fwnode_handle *fwnode;
+			struct mdio_device *mdiodev;
+
+			fwnode = fwnode_find_reference(dev->fwnode, "ext-phy-handle", 0);
+		if (!fwnode)
+			return -ENODEV;
+
+		d = bus_find_device_by_fwnode(&mdio_bus_type, fwnode);
+		if (d) {
+			mdiodev = to_mdio_device(d);
+			if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+				phydev = to_phy_device(d);
+			else
+				put_device(d);
+		}
+
+		if (!phydev)
+			return -ENODEV;
+	}
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	if ((phydev->speed != SPEED_10000) && (phydev->duplex != DUPLEX_FULL))
+		return -ENODEV;
+
+	*phy_dev = phydev;
+	return 0;
+}
+
+int baikal_xgbe_phy_config_init(struct xgbe_prv_data *pdata)
+{
+	int ret = 0;
+	int count = DELAY_COUNT;
+
+	DBGPR("%s\n", __FUNCTION__);
+
+	if (!ext_phy_probe(&pdata->platdev->dev, &pdata->phydev)) {
+		netdev_info(pdata->netdev, "use external PHY\n");
+
+		/* Initialize supported features */
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_Backplane_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+				pdata->phydev->supported, 1);
+		linkmode_copy(pdata->phydev->advertising, pdata->phydev->supported);
+	} else {
+		netdev_info(pdata->netdev, "no one external PHY found\n");
+	}
+
+	pdata->phy.pause_autoneg = 0;
+	pdata->phy.tx_pause = 0;
+	pdata->phy.rx_pause = 0;
+
+	/* Switch XGMAC PHY PLL to use external ref clock from pad */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_Gen5_MPLL_CTRL);
+	ret &= ~(VR_XS_PMA_MII_Gen5_MPLL_CTRL_REF_CLK_SEL_bit);
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, VR_XS_PMA_MII_Gen5_MPLL_CTRL, ret);
+	wmb();
+
+	/* Make vendor specific soft reset */
+	ret = XMDIO_READ(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1);
+	ret |= VR_XS_PCS_DIG_CTRL1_VR_RST_Bit;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1, ret);
+	wmb();
+
+	/* Wait reset finish */
+	count = DELAY_COUNT;
+	do {
+		usleep_range(500, 600);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, VR_XS_PCS_DIG_CTRL1);
+	} while (((ret & VR_XS_PCS_DIG_CTRL1_VR_RST_Bit) != 0) && count--);
+
+	DBGPR("%s %x\n", __FUNCTION__, ret);
+	/*
+	 * Wait for the RST (bit 15) of the "SR XS or PCS MMD Control1" Register is 0.
+	 * This bit is self-cleared when Bits[4:2] in VR XS or PCS MMD Digital
+	 * Status Register are equal to 3'b100, that is, Tx/Rx clocks are stable
+	 * and in Power_Good state.
+	 */
+	count = DELAY_COUNT;
+	do {
+		usleep_range(500, 600);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, SR_XC_or_PCS_MMD_Control1);
+	} while (((ret & SR_XC_or_PCS_MMD_Control1_RST_Bit) != 0) && count--);
+
+	/*
+	 * This bit is self-cleared when Bits[4:2] in VR XS or PCS MMD Digital
+	 * Status Register are equal to 3'b100, that is, Tx/Rx clocks are stable
+	 * and in Power_Good state.
+	 */
+	count = DELAY_COUNT;
+	do {
+		usleep_range(500, 600);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, DWC_GLBL_PLL_MONITOR);
+	} while (((ret & SDS_PCS_CLOCK_READY_mask) != SDS_PCS_CLOCK_READY_bit) && count-- );
+
+	/* Turn off and clear interrupts */
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+	wmb();
+
+	baikal_xgbe_phy_config_aneg(pdata);
+
+	ret = baikal_xgbe_phy_xgmii_mode(pdata);
+
+	count = DELAY_COUNT;
+	do {
+		msleep(10);
+		ret = XMDIO_READ(pdata, MDIO_MMD_PCS, 0x0001);
+	} while (((ret & 0x0004) != 0x0004) && count--);
+
+	return 0;
+}
+
+static int baikal_xgbe_phy_aneg_done(struct xgbe_prv_data *pdata)
+{
+	int reg;
+
+	DBGPR("%s\n", __FUNCTION__);
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1);
+	return (reg & MDIO_AN_STAT1_COMPLETE) ? 1 : 0;
+}
+
+static int baikal_xgbe_phy_update_link(struct xgbe_prv_data *pdata)
+{
+	int new_state = 0;
+	int ret = 0;
+	struct phy_device *phydev;
+
+	if (!pdata || !pdata->phydev)
+		return 1;
+
+	phydev = pdata->phydev;
+	ret = phy_read_mmd(phydev, MDIO_MMD_PHYXS, 0x1001);
+
+	if (pdata->phy.link) {
+		/* Flow control support */
+		pdata->pause_autoneg = pdata->phy.pause_autoneg;
+
+		if (pdata->tx_pause != pdata->phy.tx_pause) {
+			new_state = 1;
+			pdata->hw_if.config_tx_flow_control(pdata);
+			pdata->tx_pause = pdata->phy.tx_pause;
+		}
+
+		if (pdata->rx_pause != pdata->phy.rx_pause) {
+			new_state = 1;
+			pdata->hw_if.config_rx_flow_control(pdata);
+			pdata->rx_pause = pdata->phy.rx_pause;
+		}
+
+		/* Speed support */
+		if (pdata->phy_speed != pdata->phy.speed) {
+			new_state = 1;
+			pdata->phy_speed = pdata->phy.speed;
+		}
+
+		if (pdata->phy_link != pdata->phy.link) {
+			new_state = 1;
+			pdata->phy_link = pdata->phy.link;
+		}
+	} else if (pdata->phy_link) {
+		new_state = 1;
+		pdata->phy_link = 0;
+		pdata->phy_speed = SPEED_UNKNOWN;
+	}
+
+	return 0;
+}
+
+static void baikal_xgbe_phy_read_status(struct xgbe_prv_data *pdata)
+{
+	int reg, link_aneg;
+	int old_link = pdata->phy.link;
+
+	pdata->phy.link = 1;
+
+	if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) {
+		netif_carrier_off(pdata->netdev);
+
+		pdata->phy.link = 0;
+		goto update_link;
+	}
+
+	link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE);
+
+	/* If there is no external PHY just skip it */
+	if (pdata->phydev) {
+		pdata->phydev->drv->read_status(pdata->phydev);
+		/* Pop out old values */
+		pdata->phydev->drv->read_status(pdata->phydev);
+		if (!pdata->phydev->link) {
+			pdata->phydev->link = 0;
+			pdata->phy.link &= pdata->phydev->link;
+		}
+	}
+
+	/* Check Baikal PHY link */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+	pdata->phy.link &= (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_STAT1);
+	pdata->phy.link &= (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+
+	/* If nothing has happened with a link */
+	if ((old_link && pdata->phy.link) || (!old_link && !pdata->phy.link))
+		goto update_link;
+
+	if (pdata->phy.link) {
+		if (link_aneg && !baikal_xgbe_phy_aneg_done(pdata)) {
+			return;
+		}
+
+		if (test_bit(XGBE_LINK_INIT, &pdata->dev_state))
+			clear_bit(XGBE_LINK_INIT, &pdata->dev_state);
+
+		netif_carrier_on(pdata->netdev);
+		netdev_info(pdata->netdev, "NIC Link is Up\n");
+	} else {
+		netif_carrier_off(pdata->netdev);
+		netdev_info(pdata->netdev, "NIC Link is Down\n");
+
+		/* If KX4 mode is enabled training doesn't affect behavior */
+		baikal_xgbe_an_restart_kr_training(pdata);
+		/* Pop out old values */
+		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+		XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_STAT1);
+	}
+
+update_link:
+	baikal_xgbe_phy_update_link(pdata);
+}
+
+static void baikal_xgbe_phy_stop(struct xgbe_prv_data *pdata)
+{
+	netif_dbg(pdata, link, pdata->netdev, "stopping PHY\n");
+
+	/* Disable auto-negotiation interrupts */
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+
+	pdata->phy.link = 0;
+	netif_carrier_off(pdata->netdev);
+	baikal_xgbe_phy_update_link(pdata);
+}
+
+/**
+ * baikal_xgbe_phy_start() - dummy
+ */
+int baikal_xgbe_phy_start(struct xgbe_prv_data *pdata)
+{
+	pdata->phy.link = 0;
+	netif_carrier_off(pdata->netdev);
+	return 0;
+}
+
+/**
+ * baikal_xgbe_phy_exit() - dummy
+ */
+void baikal_xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+	return;
+}
+
+/**
+ * baikal_an_isr() - dummy
+ */
+irqreturn_t baikal_an_isr(struct xgbe_prv_data *pdata)
+{
+	return IRQ_HANDLED;
+}
+
+void xgbe_init_function_ptrs_phy_baikal(struct xgbe_phy_if *phy_if)
+{
+	phy_if->phy_init	= baikal_xgbe_phy_config_init;
+	phy_if->phy_exit	= baikal_xgbe_phy_exit;
+	phy_if->phy_reset	= baikal_xgbe_phy_soft_reset;
+	phy_if->phy_stop	= baikal_xgbe_phy_stop;
+	phy_if->phy_status	= baikal_xgbe_phy_read_status;
+	phy_if->phy_config_aneg	= baikal_xgbe_phy_config_aneg;
+	phy_if->phy_start	= baikal_xgbe_phy_start;
+	phy_if->an_isr		= baikal_an_isr;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 230726d7b74f6..ebd1985c36145 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -366,8 +366,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 	}
 
 	if (!ring->rx_buf_pa.pages) {
+#ifdef CONFIG_BAIKAL_XGBE
+		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa,
+				       0, ring->node);
+#else
 		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa,
 				       PAGE_ALLOC_COSTLY_ORDER, ring->node);
+#endif
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index decc1c09a031b..01f4d2d7e58e6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -696,7 +696,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 			 *          per channel interrupts in edge triggered
 			 *          mode)
 			 */
+#ifndef CONFIG_BAIKAL_XGBE
 			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
+#endif
 				XGMAC_SET_BITS(channel->curr_ier,
 					       DMA_CH_IER, TIE, 1);
 		}
@@ -708,7 +710,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 			 *          mode)
 			 */
 			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 1);
+#ifndef CONFIG_BAIKAL_XGBE
 			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
+#endif
 				XGMAC_SET_BITS(channel->curr_ier,
 					       DMA_CH_IER, RIE, 1);
 		}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7f705483c1c57..035122c1bc585 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -154,6 +154,57 @@ module_param(ecc_ded_period, uint, 0644);
 MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
 #endif
 
+#ifdef CONFIG_BAIKAL_XGBE
+static void xgbe_validate(struct phylink_config *config,
+                            unsigned long *supported,
+                            struct phylink_link_state *state)
+{
+        /* Dummy */
+}
+
+static int xgbe_mac_link_state(struct phylink_config *config,
+                                 struct phylink_link_state *state)
+{
+        /* Dummy */
+        state->link = 0;
+        return 1;
+}
+
+
+static void xgbe_mac_an_restart(struct phylink_config *config)
+{
+        /* Dummy */
+}
+
+static void xgbe_mac_config(struct phylink_config *config, unsigned int mode,
+                              const struct phylink_link_state *state)
+{
+        /* Dummy */
+}
+
+static void xgbe_mac_link_down(struct phylink_config *config,
+                                 unsigned int mode, phy_interface_t interface)
+{
+        /* Dummy */
+}
+
+static void xgbe_mac_link_up(struct phylink_config *config, unsigned int mode,
+                               phy_interface_t interface,
+                               struct phy_device *phy)
+{
+        /* Dummy */
+}
+
+const struct phylink_mac_ops xgbe_phylink_ops = {
+        .validate = xgbe_validate,
+        .mac_link_state = xgbe_mac_link_state,
+        .mac_an_restart = xgbe_mac_an_restart,
+        .mac_config = xgbe_mac_config,
+        .mac_link_down = xgbe_mac_link_down,
+        .mac_link_up = xgbe_mac_link_up,
+};
+#endif
+
 static int xgbe_one_poll(struct napi_struct *, int);
 static int xgbe_all_poll(struct napi_struct *, int);
 static void xgbe_stop(struct xgbe_prv_data *);
@@ -216,8 +267,15 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 		channel->node = node;
 		cpumask_set_cpu(cpu, &channel->affinity_mask);
 
+#ifndef CONFIG_BAIKAL_XGBE
 		if (pdata->per_channel_irq)
 			channel->dma_irq = pdata->channel_irq[i];
+#else
+		if (pdata->per_channel_irq) {
+			channel->tx_dma_irq = pdata->channel_tx_irq[i];
+			channel->rx_dma_irq = pdata->channel_rx_irq[i];
+		}
+#endif
 
 		if (i < pdata->tx_ring_count) {
 			ring = xgbe_alloc_node(sizeof(*ring), node);
@@ -244,10 +302,22 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 		netif_dbg(pdata, drv, pdata->netdev,
 			  "%s: cpu=%u, node=%d\n", channel->name, cpu, node);
 
+#ifndef CONFIG_BAIKAL_XGBE
 		netif_dbg(pdata, drv, pdata->netdev,
 			  "%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
 			  channel->name, channel->dma_regs, channel->dma_irq,
 			  channel->tx_ring, channel->rx_ring);
+#else
+		netif_dbg(pdata, drv, pdata->netdev,
+			  "%s: dma_regs=%p, tx_dma_irq=%d, tx=%p, rx=%p\n",
+			  channel->name, channel->dma_regs, channel->tx_dma_irq,
+			  channel->tx_ring, channel->rx_ring);
+
+		netif_dbg(pdata, drv, pdata->netdev,
+			  "%s: dma_regs=%p, rx_dma_irq=%d, tx=%p, rx=%p\n",
+			  channel->name, channel->dma_regs, channel->rx_dma_irq,
+			  channel->tx_ring, channel->rx_ring);
+#endif
 	}
 
 	pdata->channel_count = count;
@@ -623,10 +693,16 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data)
 	 */
 	if (napi_schedule_prep(&channel->napi)) {
 		/* Disable Tx and Rx interrupts */
+#ifndef CONFIG_BAIKAL_XGBE
 		if (pdata->channel_irq_mode)
 			xgbe_disable_rx_tx_int(pdata, channel);
 		else
 			disable_irq_nosync(channel->dma_irq);
+#else
+		xgbe_disable_rx_tx_int(pdata, channel);
+		disable_irq_nosync(channel->tx_dma_irq);
+		disable_irq_nosync(channel->rx_dma_irq);
+#endif
 
 		/* Turn on polling */
 		__napi_schedule_irqoff(&channel->napi);
@@ -654,10 +730,17 @@ static void xgbe_tx_timer(struct timer_list *t)
 	if (napi_schedule_prep(napi)) {
 		/* Disable Tx and Rx interrupts */
 		if (pdata->per_channel_irq)
+#ifndef CONFIG_BAIKAL_XGBE
 			if (pdata->channel_irq_mode)
 				xgbe_disable_rx_tx_int(pdata, channel);
 			else
 				disable_irq_nosync(channel->dma_irq);
+#else
+		{
+				disable_irq_nosync(channel->tx_dma_irq);
+				disable_irq_nosync(channel->rx_dma_irq);
+		}
+#endif
 		else
 			xgbe_disable_rx_tx_ints(pdata);
 
@@ -1092,6 +1175,7 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 	if (!pdata->per_channel_irq)
 		return 0;
 
+#ifndef CONFIG_BAIKAL_XGBE
 	for (i = 0; i < pdata->channel_count; i++) {
 		channel = pdata->channel[i];
 		snprintf(channel->dma_irq_name,
@@ -1112,6 +1196,61 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 				      &channel->affinity_mask);
 	}
 
+	err_dma_irq:
+		/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
+		for (i--; i < pdata->channel_count; i--) {
+			channel = pdata->channel[i];
+
+			irq_set_affinity_hint(channel->dma_irq, NULL);
+			devm_free_irq(pdata->dev, channel->dma_irq, channel);
+		}
+
+		if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+			devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
+	err_dev_irq:
+		devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+
+		return ret;
+#else
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
+		/* Tx */
+		snprintf(channel->tx_dma_irq_name,
+			 sizeof(channel->tx_dma_irq_name) - 1,
+			 "%s-Tx-%u", netdev_name(netdev),
+			 channel->queue_index);
+
+		ret = devm_request_irq(pdata->dev, channel->tx_dma_irq,
+				       xgbe_dma_isr, 0,
+				       channel->tx_dma_irq_name, channel);
+		if (ret) {
+			netdev_alert(netdev, "error requesting irq %d\n",
+				     channel->tx_dma_irq);
+			goto err_dma_irq;
+		}
+
+		irq_set_affinity_hint(channel->tx_dma_irq,
+						      &channel->affinity_mask);
+
+		/* Rx */
+		snprintf(channel->rx_dma_irq_name,
+			 sizeof(channel->rx_dma_irq_name) - 1,
+			 "%s-Rx-%u", netdev_name(netdev),
+			 channel->queue_index);
+
+		ret = devm_request_irq(pdata->dev, channel->rx_dma_irq,
+				       xgbe_dma_isr, 0,
+				       channel->rx_dma_irq_name, channel);
+		if (ret) {
+			netdev_alert(netdev, "error requesting irq %d\n",
+				     channel->rx_dma_irq);
+			goto err_dma_irq;
+		}
+
+		irq_set_affinity_hint(channel->rx_dma_irq,
+						      &channel->affinity_mask);
+	}
 	return 0;
 
 err_dma_irq:
@@ -1119,8 +1258,10 @@ err_dma_irq:
 	for (i--; i < pdata->channel_count; i--) {
 		channel = pdata->channel[i];
 
-		irq_set_affinity_hint(channel->dma_irq, NULL);
-		devm_free_irq(pdata->dev, channel->dma_irq, channel);
+		devm_free_irq(pdata->dev, channel->tx_dma_irq, channel);
+		devm_free_irq(pdata->dev, channel->rx_dma_irq, channel);
+		irq_set_affinity_hint(channel->tx_dma_irq, NULL);
+		irq_set_affinity_hint(channel->rx_dma_irq, NULL);
 	}
 
 	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
@@ -1130,6 +1271,7 @@ err_dev_irq:
 	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
 	return ret;
+#endif
 }
 
 static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
@@ -1151,8 +1293,15 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
 	for (i = 0; i < pdata->channel_count; i++) {
 		channel = pdata->channel[i];
 
+#ifndef CONFIG_BAIKAL_XGBE
 		irq_set_affinity_hint(channel->dma_irq, NULL);
 		devm_free_irq(pdata->dev, channel->dma_irq, channel);
+#else
+		irq_set_affinity_hint(channel->tx_dma_irq, NULL);
+		irq_set_affinity_hint(channel->rx_dma_irq, NULL);
+		devm_free_irq(pdata->dev, channel->tx_dma_irq, channel);
+		devm_free_irq(pdata->dev, channel->rx_dma_irq, channel);
+#endif
 	}
 }
 
@@ -1233,10 +1382,14 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
 
 static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
 {
+#ifndef CONFIG_BAIKAL_XGBE
 	pdata->phy_link = -1;
 	pdata->phy_speed = SPEED_UNKNOWN;
 
 	return pdata->phy_if.phy_reset(pdata);
+#else
+	return 0;
+#endif
 }
 
 int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
@@ -1408,6 +1561,11 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 	if (ret)
 		goto err_irqs;
 
+#ifdef CONFIG_BAIKAL_XGBE
+	if (pdata->phylink)
+		phylink_start(pdata->phylink);
+#endif
+
 	hw_if->enable_tx(pdata);
 	hw_if->enable_rx(pdata);
 
@@ -1460,6 +1618,13 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 	phy_if->phy_stop(pdata);
 
+#ifndef CONFIG_BAIKAL_XGBE
+	if (pdata->phylink) {
+		phylink_stop(pdata->phylink);
+		phylink_destroy(pdata->phylink);
+	}
+#endif
+
 	xgbe_free_irqs(pdata);
 
 	xgbe_napi_disable(pdata, 1);
@@ -2239,7 +2404,12 @@ static void xgbe_poll_controller(struct net_device *netdev)
 	if (pdata->per_channel_irq) {
 		for (i = 0; i < pdata->channel_count; i++) {
 			channel = pdata->channel[i];
+#ifndef CONFIG_BAIKAL_XGBE
 			xgbe_dma_isr(channel->dma_irq, channel);
+#else
+			xgbe_dma_isr(channel->tx_dma_irq, channel);
+			xgbe_dma_isr(channel->rx_dma_irq, channel);
+#endif
 		}
 	} else {
 		disable_irq(pdata->dev_irq);
@@ -2904,10 +3074,16 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 	/* If we processed everything, we are done */
 	if ((processed < budget) && napi_complete_done(napi, processed)) {
 		/* Enable Tx and Rx interrupts */
+#ifndef CONFIG_BAIKAL_XGBE
 		if (pdata->channel_irq_mode)
 			xgbe_enable_rx_tx_int(pdata, channel);
 		else
 			enable_irq(channel->dma_irq);
+#else
+		xgbe_enable_rx_tx_int(pdata, channel);
+		enable_irq(channel->tx_dma_irq);
+		enable_irq(channel->rx_dma_irq);
+#endif
 	}
 
 	DBGPR("<--xgbe_one_poll: received = %d\n", processed);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 7ce9c69e9c44f..5192e41247170 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -121,6 +121,9 @@
 #include <linux/etherdevice.h>
 #include <linux/io.h>
 #include <linux/notifier.h>
+#ifdef CONFIG_BAIKAL_XGBE
+#include <linux/clk.h>
+#endif
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -142,7 +145,11 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata)
 	DBGPR("-->xgbe_default_config\n");
 
 	pdata->blen = DMA_SBMR_BLEN_64;
-	pdata->pbl = DMA_PBL_128;
+#ifdef CONFIG_BAIKAL_XGBE
+	pdata->pbl = DMA_PBL_16;
+#else
+	pdata->pbl = DMA_PBL_256;
+#endif
 	pdata->aal = 1;
 	pdata->rd_osr_limit = 8;
 	pdata->wr_osr_limit = 8;
@@ -330,6 +337,19 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1);
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);
 
+#ifdef CONFIG_BAIKAL_XGBE
+	ret = clk_prepare_enable(pdata->sysclk);
+	if (ret) {
+		netdev_alert(netdev, "gmac clk_prepare_enable failed\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(pdata->ptpclk);
+	if (ret) {
+		netdev_alert(netdev, "dma clk_prepare_enable failed\n");
+		return ret;
+	}
+#endif
 	/* Call MDIO/PHY initialization routine */
 	pdata->debugfs_an_cdr_workaround = pdata->vdata->an_cdr_workaround;
 	ret = pdata->phy_if.phy_init(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4ebd2410185a9..f15e1facc3fdd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -131,9 +131,17 @@
 #include <linux/acpi.h>
 #include <linux/mdio.h>
 
+#ifdef CONFIG_BAIKAL_XGBE
+#include <linux/phy.h>
+#endif
+
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+#ifdef CONFIG_BAIKAL_XGBE
+extern const struct phylink_mac_ops xgbe_phylink_ops;
+#endif
+
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id xgbe_acpi_match[];
 
@@ -306,6 +314,9 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	unsigned int dma_irqnum, dma_irqend;
 	enum dev_dma_attr attr;
 	int ret;
+#ifdef CONFIG_BAIKAL_XGBE
+	struct fwnode_reference_args ref;
+#endif
 
 	pdata = xgbe_alloc_pdata(dev);
 	if (IS_ERR(pdata)) {
@@ -323,6 +334,27 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	/* Get the version data */
 	pdata->vdata = xgbe_get_vdata(pdata);
 
+#ifdef CONFIG_BAIKAL_XGBE
+	ret = fwnode_property_get_reference_args(pdev->dev.fwnode, "sfp", NULL,
+	                                         0, 0, &ref);
+
+	if (ret == 0) {
+		pdata->phylink_phy_mode = PHY_INTERFACE_MODE_10GKR;
+
+		pdata->phylink_config.dev = &pdata->netdev->dev;
+		pdata->phylink_config.type = PHYLINK_NETDEV;
+
+		/* Create an instance, bind with SFP, etc. */
+		pdata->phylink = phylink_create(&pdata->phylink_config,
+			pdev->dev.fwnode, pdata->phylink_phy_mode,
+			&xgbe_phylink_ops);
+
+		if (IS_ERR(pdata->phylink))
+			dev_info(pdata->dev, "cat't create phylink instance (%ld)\n",
+				PTR_ERR(pdata->phylink));
+	}
+#endif
+
 	phy_pdev = xgbe_get_phy_pdev(pdata);
 	if (!phy_pdev) {
 		dev_err(dev, "unable to obtain phy device\n");
@@ -370,6 +402,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
 
+#ifndef CONFIG_BAIKAL_XGBE
 	pdata->rxtx_regs = devm_platform_ioremap_resource(phy_pdev,
 							  phy_memnum++);
 	if (IS_ERR(pdata->rxtx_regs)) {
@@ -399,6 +432,7 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	}
 	if (netif_msg_probe(pdata))
 		dev_dbg(dev, "sir1_regs  = %p\n", pdata->sir1_regs);
+#endif
 
 	/* Retrieve the MAC address */
 	ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY,
@@ -425,7 +459,11 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	/* Check for per channel interrupt support */
 	if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) {
 		pdata->per_channel_irq = 1;
+#ifndef CONFIG_BAIKAL_XGBE
 		pdata->channel_irq_mode = XGBE_IRQ_MODE_EDGE;
+#else
+		pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL;
+#endif
 	}
 
 	/* Obtain device settings unique to ACPI/OF */
@@ -467,8 +505,12 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_io;
 	pdata->dev_irq = ret;
+#ifdef CONFIG_BAIKAL_XGBE
+	pdata->an_irq = pdata->dev_irq;
+#endif
 
 	/* Get the per channel DMA interrupts */
+#ifndef CONFIG_BAIKAL_XGBE
 	if (pdata->per_channel_irq) {
 		unsigned int i, max = ARRAY_SIZE(pdata->channel_irq);
 
@@ -484,12 +526,39 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 
 		pdata->irq_count += max;
 	}
+#else
+	if (pdata->per_channel_irq) {
+		int i;	
+		unsigned int max = ARRAY_SIZE(pdata->channel_tx_irq);
+
+		/* Tx */
+		for (i = 0; i < pdata->tx_max_channel_count; ++i) {
+			ret = platform_get_irq(pdata->platdev, i + 1);
+			if (ret < 0)
+				goto err_io;
+			pdata->channel_tx_irq[i] = ret;	
+		}
+
+		/* Rx */
+		for (i = 0; i < pdata->rx_max_channel_count; ++i) {
+			ret = platform_get_irq(pdata->platdev, i + 9);
+			if (ret < 0)
+				goto err_io;
+			pdata->channel_rx_irq[i] = ret;	
+		}	
 
+		pdata->channel_irq_count = max;
+		pdata->irq_count += max;
+	}
+#endif
+
+#ifndef CONFIG_BAIKAL_XGBE
 	/* Get the auto-negotiation interrupt */
 	ret = platform_get_irq(phy_pdev, phy_irqnum++);
 	if (ret < 0)
 		goto err_io;
 	pdata->an_irq = ret;
+#endif
 
 	/* Configure the netdev resource */
 	ret = xgbe_config_netdev(pdata);
@@ -573,7 +642,11 @@ static int xgbe_platform_resume(struct device *dev)
 #endif /* CONFIG_PM_SLEEP */
 
 static const struct xgbe_version_data xgbe_v1 = {
+#ifdef CONFIG_BAIKAL_XGBE
+	.init_function_ptrs_phy_impl	= xgbe_init_function_ptrs_phy_baikal,
+#else
 	.init_function_ptrs_phy_impl	= xgbe_init_function_ptrs_phy_v1,
+#endif
 	.xpcs_access			= XGBE_XPCS_ACCESS_V1,
 	.tx_max_fifo_size		= 81920,
 	.rx_max_fifo_size		= 81920,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 729307a96c50d..67d5d83936e2c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -133,6 +133,9 @@
 #include <linux/dcache.h>
 #include <linux/ethtool.h>
 #include <linux/list.h>
+#ifdef CONFIG_BAIKAL_XGBE
+#include <linux/phylink.h>
+#endif
 
 #define XGBE_DRV_NAME		"amd-xgbe"
 #define XGBE_DRV_VERSION	"1.0.3"
@@ -209,6 +212,9 @@
 #define XGBE_SPEEDSET_PROPERTY	"amd,speed-set"
 
 /* Device-tree clock names */
+#ifdef CONFIG_BAIKAL_XGBE
+#define XGBE_AXI_CLOCK          "axi"
+#endif
 #define XGBE_DMA_CLOCK		"dma_clk"
 #define XGBE_PTP_CLOCK		"ptp_clk"
 
@@ -504,8 +510,15 @@ struct xgbe_channel {
 	void __iomem *dma_regs;
 
 	/* Per channel interrupt irq number */
+#ifndef CONFIG_BAIKAL_XGBE
 	int dma_irq;
 	char dma_irq_name[IFNAMSIZ + 32];
+#else
+	int rx_dma_irq;
+	int tx_dma_irq;
+	char rx_dma_irq_name[IFNAMSIZ + 32];
+	char tx_dma_irq_name[IFNAMSIZ + 32];
+#endif
 
 	/* Netdev related settings */
 	struct napi_struct napi;
@@ -1031,6 +1044,15 @@ struct xgbe_prv_data {
 	struct platform_device *phy_platdev;
 	struct device *phy_dev;
 
+#ifdef CONFIG_BAIKAL_XGBE
+	/* phydevice - tranciever */
+	struct phy_device *phydev;
+
+	struct phylink *phylink;
+	struct phylink_config phylink_config;
+	int phylink_phy_mode;
+#endif
+
 	/* Version related data */
 	struct xgbe_version_data *vdata;
 
@@ -1089,6 +1111,10 @@ struct xgbe_prv_data {
 	int ecc_irq;
 	int i2c_irq;
 	int channel_irq[XGBE_MAX_DMA_CHANNELS];
+#ifdef CONFIG_BAIKAL_XGBE
+	int channel_tx_irq[XGBE_MAX_DMA_CHANNELS];
+	int channel_rx_irq[XGBE_MAX_DMA_CHANNELS];
+#endif
 
 	unsigned int per_channel_irq;
 	unsigned int irq_count;
@@ -1329,6 +1355,10 @@ const struct ethtool_ops *xgbe_get_ethtool_ops(void);
 const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void);
 #endif
 
+#ifdef CONFIG_BAIKAL_XGBE
+void xgbe_init_function_ptrs_phy_baikal(struct xgbe_phy_if *);
+#endif
+
 void xgbe_ptp_register(struct xgbe_prv_data *);
 void xgbe_ptp_unregister(struct xgbe_prv_data *);
 void xgbe_dump_tx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 338e25a6374e9..b7af9c386810d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -65,6 +65,14 @@ config DWMAC_ANARION
 
 	  This selects the Anarion SoC glue layer support for the stmmac driver.
 
+config DWMAC_BAIKAL
+	tristate "Baikal Electronics DWMAC support"
+	depends on OF
+	help
+	  Support for Baikal Electronics DWMAC Ethernet.
+
+	  This selects the Baikal SoC glue layer support for the stmmac driver.
+
 config DWMAC_IPQ806X
 	tristate "QCA IPQ806x DWMAC support"
 	default ARCH_QCOM
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c59926d96bccc..1b7c8cbf1f38c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -5,7 +5,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o	\
 	      mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o	\
 	      dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \
-	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
+	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o stmmac_xsk.o\
 	      $(stmmac-y)
 
 stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
@@ -13,6 +13,7 @@ stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
+obj-$(CONFIG_DWMAC_BAIKAL)	+= dwmac-baikal.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MEDIATEK)	+= dwmac-mediatek.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-baikal.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-baikal.c
new file mode 100644
index 0000000000000..d7bfe8df38ae4
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-baikal.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Baikal Electronics DWMAC specific glue layer
+ *
+ * Copyright (C) 2015-2022 Baikal Electronics, JSC
+ * Authors: Dmitry Dunaev <dmitry.dunaev@baikalelectronics.ru>
+ *          Alexey Sheplyakov <asheplyakov@altlinux.org>
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+#include "common.h"
+#include "dwmac_dma.h"
+#include "dwmac1000_dma.h"
+
+#define MAC_GPIO	0x00e0		/* GPIO register */
+#define MAC_GPIO_GPO	(1 << 8)	/* Output port */
+
+#define BAIKAL_SMC_GMAC_DIV2_ENABLE	0x82000500
+#define BAIKAL_SMC_GMAC_DIV2_DISABLE	0x82000501
+
+struct baikal_gmac {
+	struct device	*dev;
+	uint64_t	base;
+	struct clk	*axi_clk;
+	struct clk	*tx2_clk;
+	int		has_aux_div2;
+};
+
+static int baikal_gmac_dma_reset(void __iomem *ioaddr)
+{
+	int err;
+	u32 value;
+
+	/* DMA SW reset */
+	value = readl(ioaddr + DMA_BUS_MODE);
+	value |= DMA_BUS_MODE_SFT_RESET;
+	writel(value, ioaddr + DMA_BUS_MODE);
+
+	/* Software DMA reset also resets MAC, so GP_OUT is set to zero.
+	 * Which resets PHY as a side effect (if GP_OUT is connected directly
+	 * to PHY reset).
+	 * TODO: read the PHY reset duration from the device tree.
+	 * Meanwhile use 100 milliseconds which seems to be enough for
+	 * most PHYs
+	 */
+	usleep_range(100000, 120000);
+
+	/* Clear PHY reset */
+	value = readl(ioaddr + MAC_GPIO);
+	value |= MAC_GPIO_GPO;
+	writel(value, ioaddr + MAC_GPIO);
+
+	/* Many PHYs need ~100 milliseconds to calm down after PHY reset
+	 * has been cleared. And check for DMA reset below might return
+	 * much earlier (i.e. in ~20 milliseconds). As a result reading
+	 * PHY registers (after this function returns) might return garbage.
+	 * Wait a bit to avoid the problem.
+	 */
+	usleep_range(100000, 150000);
+
+	err = readl_poll_timeout(ioaddr + DMA_BUS_MODE, value,
+				 !(value & DMA_BUS_MODE_SFT_RESET),
+				 10000, 1000000);
+	if (err) {
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static const struct stmmac_dma_ops baikal_gmac_dma_ops = {
+	.reset = baikal_gmac_dma_reset,
+	.init = dwmac1000_dma_init,
+	.init_rx_chan = dwmac1000_dma_init_rx,
+	.init_tx_chan = dwmac1000_dma_init_tx,
+	.axi = dwmac1000_dma_axi,
+	.dump_regs = dwmac1000_dump_dma_regs,
+	.dma_rx_mode = dwmac1000_dma_operation_mode_rx,
+	.dma_tx_mode = dwmac1000_dma_operation_mode_tx,
+	.enable_dma_transmission = dwmac_enable_dma_transmission,
+	.enable_dma_irq = dwmac_enable_dma_irq,
+	.disable_dma_irq = dwmac_disable_dma_irq,
+	.start_tx = dwmac_dma_start_tx,
+	.stop_tx = dwmac_dma_stop_tx,
+	.start_rx = dwmac_dma_start_rx,
+	.stop_rx = dwmac_dma_stop_rx,
+	.dma_interrupt = dwmac_dma_interrupt,
+	.get_hw_feature = dwmac1000_get_hw_feature,
+	.rx_watchdog = dwmac1000_rx_watchdog
+};
+
+static struct mac_device_info *baikal_gmac_setup(void *ppriv)
+{
+	struct mac_device_info *mac, *old_mac;
+	struct stmmac_priv *priv = ppriv;
+	struct gpio_desc *reset_gpio;
+	int err;
+	u32 value;
+
+	mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
+	if (!mac) {
+		return NULL;
+	}
+
+	/* Clear PHY reset */
+	value = readl(priv->ioaddr + MAC_GPIO);
+	value |= MAC_GPIO_GPO;
+	writel(value, priv->ioaddr + MAC_GPIO);
+	reset_gpio = devm_gpiod_get_optional(priv->device,
+					     "snps,reset",
+					     GPIOD_OUT_LOW);
+
+	err = readl_poll_timeout(priv->ioaddr + DMA_BUS_MODE, value,
+				 !(value & DMA_BUS_MODE_SFT_RESET),
+				 10000, 1000000);
+
+	if (reset_gpio != NULL) {
+		devm_gpiod_put(priv->device, reset_gpio);
+	}
+
+	if (err) {
+		dev_err(priv->device, "SW reset is not cleared: error %d", err);
+		return NULL;
+	}
+
+	mac->dma = &baikal_gmac_dma_ops;
+	old_mac = priv->hw;
+	priv->hw = mac;
+	err = dwmac1000_setup(priv);
+	priv->hw = old_mac;
+	if (err) {
+		dev_err(priv->device,
+			"%s: dwmac1000_setup failed with error %d",
+			__func__, err);
+		return NULL;
+	}
+
+	return mac;
+}
+
+static void baikal_gmac_fix_mac_speed(void *priv, unsigned int speed)
+{
+	struct arm_smccc_res res;
+	struct baikal_gmac *gmac = priv;
+	unsigned long tx2_clk_freq = 0;
+
+	switch (speed) {
+	case SPEED_1000:
+		tx2_clk_freq = 250000000;
+		if (gmac->has_aux_div2) {
+			arm_smccc_smc(BAIKAL_SMC_GMAC_DIV2_DISABLE,
+				      gmac->base, 0, 0, 0, 0, 0, 0, &res);
+		}
+		break;
+	case SPEED_100:
+		tx2_clk_freq = 50000000;
+		if (gmac->has_aux_div2) {
+			arm_smccc_smc(BAIKAL_SMC_GMAC_DIV2_DISABLE,
+				      gmac->base, 0, 0, 0, 0, 0, 0, &res);
+		}
+		break;
+	case SPEED_10:
+		tx2_clk_freq = 5000000;
+		if (gmac->has_aux_div2) {
+			tx2_clk_freq *= 2;
+			arm_smccc_smc(BAIKAL_SMC_GMAC_DIV2_ENABLE,
+				      gmac->base, 0, 0, 0, 0, 0, 0, &res);
+		}
+		break;
+	}
+
+	if (gmac->tx2_clk != NULL && tx2_clk_freq) {
+		clk_set_rate(gmac->tx2_clk, tx2_clk_freq);
+	}
+}
+
+#ifdef CONFIG_ACPI
+static struct plat_stmmacenet_data *baikal_stmmac_probe_config(struct device *dev,
+								const char **mac)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	u8 nvmem_mac[ETH_ALEN];
+	int ret;
+	bool is_fixed_clk = false;
+
+	plat_dat = devm_kzalloc(dev, sizeof(*plat_dat), GFP_KERNEL);
+	if (!plat_dat) {
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = nvmem_get_mac_address(dev, &nvmem_mac);
+	if (ret) {
+		if (ret == -EPROBE_DEFER) {
+			return ERR_PTR(ret);
+		}
+
+		*mac = NULL;
+	} else {
+		*mac = devm_kmemdup(dev, nvmem_mac, ETH_ALEN, GFP_KERNEL);
+	}
+
+	plat_dat->phy_interface = device_get_phy_mode(dev);
+	if (plat_dat->phy_interface < 0) {
+		return NULL;
+	}
+
+	plat_dat->interface = plat_dat->phy_interface;
+
+	if (device_property_read_u32(dev, "max-speed", &plat_dat->max_speed)) {
+		plat_dat->max_speed = -1;
+	}
+
+	plat_dat->bus_id = ACPI_COMPANION(dev)->pnp.instance_no;
+
+	ret = device_property_read_u32(dev, "reg", &plat_dat->phy_addr);
+	if (ret) {
+		dev_err(dev, "couldn't get reg property\n");
+		return ERR_PTR(ret);
+	}
+
+	if (plat_dat->phy_addr >= PHY_MAX_ADDR) {
+		dev_err(dev, "PHY address %i is too large\n",
+				plat_dat->phy_addr);
+		return ERR_PTR(-EINVAL);
+	}
+
+	plat_dat->mdio_bus_data = devm_kzalloc(dev,
+						sizeof(*plat_dat->mdio_bus_data),
+						GFP_KERNEL);
+	if (!plat_dat->mdio_bus_data) {
+		return ERR_PTR(-ENOMEM);
+	}
+
+	plat_dat->mdio_bus_data->needs_reset = true;
+	plat_dat->maxmtu = JUMBO_LEN;
+	plat_dat->multicast_filter_bins = HASH_TABLE_SIZE;
+	plat_dat->unicast_filter_entries = 1;
+	plat_dat->bugged_jumbo = 1; /* TODO: is it really required? */
+
+	plat_dat->dma_cfg = devm_kzalloc(dev,
+					sizeof(*plat_dat->dma_cfg),
+					GFP_KERNEL);
+	if (!plat_dat->dma_cfg) {
+		return ERR_PTR(-ENOMEM);
+	}
+
+	plat_dat->dma_cfg->pbl = DEFAULT_DMA_PBL;
+	device_property_read_u32(dev, "snps,txpbl", &plat_dat->dma_cfg->txpbl);
+	device_property_read_u32(dev, "snps,rxpbl", &plat_dat->dma_cfg->rxpbl);
+	plat_dat->dma_cfg->fixed_burst = device_property_read_bool(dev,
+							"snps,fixed-burst");
+
+	plat_dat->axi = devm_kzalloc(dev, sizeof(*plat_dat->axi), GFP_KERNEL);
+	if (!plat_dat->axi) {
+		return ERR_PTR(-ENOMEM);
+	}
+
+	device_property_read_u32_array(dev, "snps,blen",
+					plat_dat->axi->axi_blen, AXI_BLEN);
+
+	plat_dat->rx_queues_to_use = 1;
+	plat_dat->tx_queues_to_use = 1;
+	plat_dat->rx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB;
+	plat_dat->tx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB;
+
+	if (device_property_read_u32(dev, "stmmac-clk", &plat_dat->clk_ptp_rate)) {
+		plat_dat->clk_ptp_rate = 50000000;
+	}
+
+	plat_dat->stmmac_clk = devm_clk_get(dev, STMMAC_RESOURCE_NAME);
+	if (IS_ERR(plat_dat->stmmac_clk)) {
+		if (!plat_dat->clk_ptp_rate) {
+			dev_err(dev, "stmmaceth clock and 'stmmac-clk' property are missed simultaneously\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		plat_dat->stmmac_clk = clk_register_fixed_rate(NULL,
+							dev_name(dev), NULL, 0,
+							plat_dat->clk_ptp_rate);
+		if (IS_ERR(plat_dat->stmmac_clk))
+			return ERR_CAST(plat_dat->stmmac_clk);
+
+		is_fixed_clk = true;
+	} else {
+		if (!plat_dat->clk_ptp_rate)
+			plat_dat->clk_ptp_rate = clk_get_rate(plat_dat->stmmac_clk);
+	}
+
+	plat_dat->clk_ptp_ref = devm_clk_get(dev, "ptp_ref");
+	if (IS_ERR(plat_dat->clk_ptp_ref)) {
+		plat_dat->clk_ptp_ref = NULL;
+	} else {
+		plat_dat->clk_ptp_rate = clk_get_rate(plat_dat->clk_ptp_ref);
+	}
+
+	clk_prepare_enable(plat_dat->stmmac_clk);
+
+	plat_dat->stmmac_rst = devm_reset_control_get(dev,
+						      STMMAC_RESOURCE_NAME);
+	if (IS_ERR(plat_dat->stmmac_rst))
+		plat_dat->stmmac_rst = NULL;
+
+	plat_dat->mdio_bus_data->phy_mask = ~0;
+
+	if (device_get_child_node_count(dev) != 1) {
+		clk_disable_unprepare(plat_dat->stmmac_clk);
+		if (is_fixed_clk)
+			clk_unregister_fixed_rate(plat_dat->stmmac_clk);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return plat_dat;
+}
+
+static int baikal_add_mdio_phy(struct device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev));
+	struct fwnode_handle *fwnode = device_get_next_child_node(dev, NULL);
+	struct phy_device *phy;
+	int ret;
+
+	phy = get_phy_device(priv->mii, priv->plat->phy_addr, 0);
+	if (IS_ERR(phy)) {
+		return PTR_ERR(phy);
+	}
+
+	phy->irq = priv->mii->irq[priv->plat->phy_addr];
+	phy->mdio.dev.fwnode = fwnode;
+
+	ret = phy_device_register(phy);
+	if (ret) {
+		phy_device_free(phy);
+		return ret;
+	}
+
+	return 0;
+}
+#else
+static struct plat_stmmacenet_data *baikal_stmmac_probe_config(struct device *dev,
+							       const char **mac)
+{
+	return NULL;
+}
+
+static int baikal_add_mdio_phy(struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static int baikal_gmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct resource *res;
+	struct baikal_gmac *gmac;
+	struct device_node *dn = NULL;
+	const char *str = NULL;
+	int ret;
+
+	if (acpi_disabled) {
+		ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+		if (ret) {
+			return ret;
+		}
+	} else {
+		memset(&stmmac_res, 0, sizeof(stmmac_res));
+		stmmac_res.irq = platform_get_irq(pdev, 0);
+		if (stmmac_res.irq < 0) {
+			return stmmac_res.irq;
+		}
+
+		stmmac_res.wol_irq = stmmac_res.irq;
+		stmmac_res.addr = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(stmmac_res.addr)) {
+			return PTR_ERR(stmmac_res.addr);
+		}
+	}
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_warn(&pdev->dev, "no suitable DMA available\n");
+		return ret;
+	}
+
+	if (pdev->dev.of_node) {
+		plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+		if (IS_ERR(plat_dat)) {
+			dev_err(&pdev->dev, "dt configuration failed\n");
+			return PTR_ERR(plat_dat);
+		}
+	} else if (!acpi_disabled) {
+		plat_dat = baikal_stmmac_probe_config(&pdev->dev,
+							&stmmac_res.mac);
+		if (IS_ERR(plat_dat)) {
+			dev_err(&pdev->dev, "acpi configuration failed\n");
+			return PTR_ERR(plat_dat);
+		}
+
+		dn = kzalloc(sizeof(struct device_node), GFP_KERNEL);
+		if (!dn) {
+			ret = -ENOMEM;
+			goto err_remove_config_dt;
+		}
+
+		plat_dat->phy_node = dn;
+	} else {
+		plat_dat = dev_get_platdata(&pdev->dev);
+		if (!plat_dat) {
+			dev_err(&pdev->dev, "no platform data provided\n");
+			return -EINVAL;
+		}
+
+		/* Set default value for multicast hash bins */
+		plat_dat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+		/* Set default value for unicast filter entries */
+		plat_dat->unicast_filter_entries = 1;
+	}
+
+	gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
+	if (!gmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
+
+	gmac->dev = &pdev->dev;
+	gmac->tx2_clk = devm_clk_get(gmac->dev, "tx2_clk");
+	if (IS_ERR(gmac->tx2_clk)) {
+		dev_warn(&pdev->dev, "couldn't get TX2 clock\n");
+		gmac->tx2_clk = NULL;
+	}
+
+	gmac->axi_clk = devm_clk_get(gmac->dev, "axi_clk");
+	if (IS_ERR(gmac->axi_clk)) {
+		dev_warn(&pdev->dev, "couldn't get AXI clock\n");
+		gmac->axi_clk = NULL;
+	} else {
+		clk_set_rate(gmac->axi_clk, 300000000);
+	}
+
+	if (!acpi_disabled) {
+		device_property_read_string(&pdev->dev, "compatible", &str);
+	}
+
+	if ((gmac->dev->of_node &&
+	     of_device_is_compatible(gmac->dev->of_node, "baikal,bs1000-gmac"))
+	    || (str && strcasecmp(str, "baikal,bs1000-gmac") == 0)) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		gmac->base = res->start;
+		gmac->has_aux_div2 = 1;
+	} else {
+		gmac->has_aux_div2 = 0;
+	}
+
+	plat_dat->fix_mac_speed = baikal_gmac_fix_mac_speed;
+	plat_dat->bsp_priv = gmac;
+	plat_dat->has_gmac = 1;
+	plat_dat->bugged_jumbo = 1; /* TODO: is it really required? */
+	plat_dat->setup = baikal_gmac_setup;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret) {
+		goto err_remove_config_dt;
+	}
+
+	if (!acpi_disabled) {
+		ret = baikal_add_mdio_phy(&pdev->dev);
+		if (ret) {
+			goto err_remove_config_dt;
+		}
+	}
+
+	kfree(dn);
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	if (!acpi_disabled) {
+		clk_disable_unprepare(plat_dat->stmmac_clk);
+		clk_unregister_fixed_rate(plat_dat->stmmac_clk);
+	}
+
+	kfree(dn);
+	return ret;
+}
+
+static int baikal_gmac_remove(struct platform_device *pdev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(&pdev->dev));
+
+	if (!acpi_disabled) {
+		clk_disable_unprepare(priv->plat->stmmac_clk);
+		clk_unregister_fixed_rate(priv->plat->stmmac_clk);
+		priv->plat->stmmac_clk = NULL;
+	}
+
+	return stmmac_pltfr_remove(pdev);
+}
+
+static const struct of_device_id baikal_gmac_dwmac_match[] = {
+	{ .compatible = "baikal,bm1000-gmac" },
+	{ .compatible = "baikal,bs1000-gmac" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, baikal_gmac_dwmac_match);
+
+static struct platform_driver baikal_gmac_dwmac_driver = {
+	.probe	= baikal_gmac_probe,
+	.remove	= baikal_gmac_remove,
+	.driver	= {
+		.name = "baikal-gmac-dwmac",
+		.pm = &stmmac_pltfr_pm_ops,
+		.of_match_table = of_match_ptr(baikal_gmac_dwmac_match)
+	}
+};
+module_platform_driver(baikal_gmac_dwmac_driver);
+
+MODULE_DESCRIPTION("Baikal DWMAC specific glue driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 1d0b64bd1e1a9..65e8cd640f740 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -565,3 +565,4 @@ int dwmac1000_setup(struct stmmac_priv *priv)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dwmac1000_setup);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index fbf2deafe8ba3..0a32081c4fb07 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -16,7 +16,7 @@
 #include "dwmac1000.h"
 #include "dwmac_dma.h"
 
-static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
+void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 {
 	u32 value = readl(ioaddr + DMA_AXI_BUS_MODE);
 	int i;
@@ -69,9 +69,10 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 
 	writel(value, ioaddr + DMA_AXI_BUS_MODE);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_axi);
 
-static void dwmac1000_dma_init(void __iomem *ioaddr,
-			       struct stmmac_dma_cfg *dma_cfg, int atds)
+void dwmac1000_dma_init(void __iomem *ioaddr,
+		        struct stmmac_dma_cfg *dma_cfg, int atds)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
 	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
@@ -109,22 +110,25 @@ static void dwmac1000_dma_init(void __iomem *ioaddr,
 	/* Mask interrupts by writing to CSR7 */
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_init);
 
-static void dwmac1000_dma_init_rx(void __iomem *ioaddr,
-				  struct stmmac_dma_cfg *dma_cfg,
-				  dma_addr_t dma_rx_phy, u32 chan)
+void dwmac1000_dma_init_rx(void __iomem *ioaddr,
+			   struct stmmac_dma_cfg *dma_cfg,
+			   dma_addr_t dma_rx_phy, u32 chan)
 {
 	/* RX descriptor base address list must be written into DMA CSR3 */
 	writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_init_rx);
 
-static void dwmac1000_dma_init_tx(void __iomem *ioaddr,
-				  struct stmmac_dma_cfg *dma_cfg,
-				  dma_addr_t dma_tx_phy, u32 chan)
+void dwmac1000_dma_init_tx(void __iomem *ioaddr,
+			   struct stmmac_dma_cfg *dma_cfg,
+			   dma_addr_t dma_tx_phy, u32 chan)
 {
 	/* TX descriptor base address list must be written into DMA CSR4 */
 	writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_init_tx);
 
 static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
 {
@@ -147,8 +151,8 @@ static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz)
 	return csr6;
 }
 
-static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
-					    u32 channel, int fifosz, u8 qmode)
+void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+				     u32 channel, int fifosz, u8 qmode)
 {
 	u32 csr6 = readl(ioaddr + DMA_CONTROL);
 
@@ -174,9 +178,10 @@ static void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
 
 	writel(csr6, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_operation_mode_rx);
 
-static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
-					    u32 channel, int fifosz, u8 qmode)
+void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+				     u32 channel, int fifosz, u8 qmode)
 {
 	u32 csr6 = readl(ioaddr + DMA_CONTROL);
 
@@ -207,8 +212,9 @@ static void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
 
 	writel(csr6, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dma_operation_mode_tx);
 
-static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
+void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 {
 	int i;
 
@@ -217,8 +223,9 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 			reg_space[DMA_BUS_MODE / 4 + i] =
 				readl(ioaddr + DMA_BUS_MODE + i * 4);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_dump_dma_regs);
 
-static int dwmac1000_get_hw_feature(void __iomem *ioaddr,
+int dwmac1000_get_hw_feature(void __iomem *ioaddr,
 				    struct dma_features *dma_cap)
 {
 	u32 hw_cap = readl(ioaddr + DMA_HW_FEATURE);
@@ -262,12 +269,14 @@ static int dwmac1000_get_hw_feature(void __iomem *ioaddr,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dwmac1000_get_hw_feature);
 
-static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
-				  u32 number_chan)
+void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
+			   u32 number_chan)
 {
 	writel(riwt, ioaddr + DMA_RX_WATCHDOG);
 }
+EXPORT_SYMBOL_GPL(dwmac1000_rx_watchdog);
 
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.reset = dwmac_dma_reset,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.h
new file mode 100644
index 0000000000000..df82d4ae8a80f
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __DWMAC1000_DMA_H__
+#define __DWMAC1000_DMA_H__
+#include "dwmac1000.h"
+
+void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi);
+void dwmac1000_dma_init(void __iomem *ioaddr,
+		        struct stmmac_dma_cfg *dma_cfg, int atds);
+void dwmac1000_dma_init_rx(void __iomem *ioaddr,
+			   struct stmmac_dma_cfg *dma_cfg,
+			   dma_addr_t dma_rx_phy, u32 chan);
+void dwmac1000_dma_init_tx(void __iomem *ioaddr,
+			   struct stmmac_dma_cfg *dma_cfg,
+			   dma_addr_t dma_tx_phy, u32 chan);
+void dwmac1000_dma_operation_mode_rx(void __iomem *ioaddr, int mode,
+				     u32 channel, int fifosz, u8 qmode);
+void dwmac1000_dma_operation_mode_tx(void __iomem *ioaddr, int mode,
+				     u32 channel, int fifosz, u8 qmode);
+void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space);
+
+int dwmac1000_get_hw_feature(void __iomem *ioaddr,
+			      struct dma_features *dma_cap);
+
+void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+#endif /* __DWMAC1000_DMA_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 1bc25aa86dbd2..bb1af371b5bb9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -36,16 +36,19 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr)
 {
 	writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
 }
+EXPORT_SYMBOL_GPL(dwmac_enable_dma_transmission);
 
 void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
 }
+EXPORT_SYMBOL_GPL(dwmac_enable_dma_irq);
 
 void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
 	writel(0, ioaddr + DMA_INTR_ENA);
 }
+EXPORT_SYMBOL_GPL(dwmac_disable_dma_irq);
 
 void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
 {
@@ -53,6 +56,7 @@ void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
 	value |= DMA_CONTROL_ST;
 	writel(value, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac_dma_start_tx);
 
 void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 {
@@ -60,6 +64,7 @@ void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 	value &= ~DMA_CONTROL_ST;
 	writel(value, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac_dma_stop_tx);
 
 void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
 {
@@ -67,6 +72,7 @@ void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
 	value |= DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac_dma_start_rx);
 
 void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 {
@@ -74,6 +80,7 @@ void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 	value &= ~DMA_CONTROL_SR;
 	writel(value, ioaddr + DMA_CONTROL);
 }
+EXPORT_SYMBOL_GPL(dwmac_dma_stop_rx);
 
 #ifdef DWMAC_DMA_DEBUG
 static void show_tx_process_state(unsigned int status)
@@ -215,6 +222,7 @@ int dwmac_dma_interrupt(void __iomem *ioaddr,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dwmac_dma_interrupt);
 
 void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr)
 {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index d993fc7e82c3a..f5dbfc405a758 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -12,6 +12,8 @@
 #define STMMAC_RESOURCE_NAME   "stmmaceth"
 #define DRV_MODULE_VERSION	"Jan_2016"
 
+#define STMMAC_TX_NEED_RESCHED	(-1)
+
 #include <linux/clk.h>
 #include <linux/if_vlan.h>
 #include <linux/stmmac.h>
@@ -37,6 +39,9 @@ struct stmmac_tx_info {
 	unsigned len;
 	bool last_segment;
 	bool is_jumbo;
+	u8 tx_source_type;
+	void *page_addr;
+	struct page *page;
 };
 
 /* Frequently used values are kept adjacent for cache effect */
@@ -56,6 +61,15 @@ struct stmmac_tx_queue {
 	u32 mss;
 };
 
+/* For XSK socket */
+struct stmmac_xsk_desc_map {
+	/* For Rx descriptors only */
+	dma_addr_t dma_addr;
+	void *cpu_addr;
+	u64 handle;
+	u32 page_offset;
+};
+
 struct stmmac_rx_buffer {
 	struct page *page;
 	struct page *sec_page;
@@ -82,6 +96,15 @@ struct stmmac_rx_queue {
 		unsigned int len;
 		unsigned int error;
 	} state;
+
+	/* AF_XDP support */
+	struct xdp_rxq_info xdp_rxq;
+	struct zero_copy_allocator zca;
+	/* Buffer info (extra data) is used for XSK */
+	struct stmmac_xsk_desc_map desc_map[DMA_RX_SIZE];
+	struct timer_list rx_init_timer;
+	struct timer_list rx_refill_timer;
+	bool rx_empty;
 };
 
 struct stmmac_channel {
@@ -220,6 +243,7 @@ struct stmmac_priv {
 
 	unsigned long state;
 	struct workqueue_struct *wq;
+	struct workqueue_struct *refill_wq;
 	struct work_struct service_task;
 
 	/* TC Handling */
@@ -234,6 +258,19 @@ struct stmmac_priv {
 
 	/* Receive Side Scaling */
 	struct stmmac_rss rss;
+
+	/* AF_XDP support */
+	struct bpf_prog *xdp_prog;
+	struct xdp_umem **xsk_umems;
+	struct zero_copy_allocator zca;
+	u16 num_xsk_umems_used;
+	u16 num_xsk_umems;
+
+	/* AF_XDP add to start_xmit() asynchronous way of transferring data so add
+	 * locking mechanism to stop competition. Rx lock mechanism is build in
+	 * Rx cleaning function.
+	 */
+	atomic_t tx_lock;
 };
 
 enum stmmac_state {
@@ -243,6 +280,15 @@ enum stmmac_state {
 	STMMAC_SERVICE_SCHED,
 };
 
+int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue);
+int stmmac_hw_restrict_setup(struct net_device *dev, bool init_ptp);
+int init_dma_desc_rings(struct net_device *dev, gfp_t flags);
+void init_dma_rx_desc_rings_xsk(struct net_device *dev);
+void stmmac_stop_all_dma(struct stmmac_priv *priv);
+void stmmac_start_all_dma(struct stmmac_priv *priv);
+void free_dma_rx_desc_resources(struct stmmac_priv *priv);
+int alloc_dma_rx_desc_resources(struct stmmac_priv *priv);
+
 int stmmac_mdio_unregister(struct net_device *ndev);
 int stmmac_mdio_register(struct net_device *ndev);
 int stmmac_mdio_reset(struct mii_bus *mii);
@@ -258,6 +304,7 @@ int stmmac_dvr_probe(struct device *device,
 		     struct stmmac_resources *res);
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
+u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6a3b0f76d9729..37f26446ff6ee 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -44,6 +44,7 @@
 #include "dwmac1000.h"
 #include "dwxgmac2.h"
 #include "hwif.h"
+#include "stmmac_xsk.h"
 
 #define	STMMAC_ALIGN(x)		ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
 #define	TSO_MAX_BUFF_SIZE	(SZ_16K - 1)
@@ -262,7 +263,7 @@ static void print_pkt(unsigned char *buf, int len)
 	print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, buf, len);
 }
 
-static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
+inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	u32 avail;
@@ -280,7 +281,7 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
  * @priv: driver private structure
  * @queue: RX queue index
  */
-static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
+u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 	u32 dirty;
@@ -1263,6 +1264,47 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 	}
 }
 
+/**
+ * init_dma_rx_desc_rings_xsk() - initialize allocated XSK descriptor rings
+ * @dev: associated net device
+ */
+void init_dma_rx_desc_rings_xsk(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	int bfsize = 0;
+	int queue;
+
+	bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
+	if (bfsize < 0)
+		bfsize = 0;
+
+	if (bfsize < BUF_SIZE_16KiB)
+		bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
+
+	priv->dma_buf_sz = bfsize;
+
+	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		netif_dbg(priv, probe, priv->dev,
+			  "(%s) dma_rx_phy=0x%08x\n", __func__,
+			  (u32)rx_q->dma_rx_phy);
+
+		stmmac_clear_rx_descriptors(priv, queue);
+
+		/* Setup the chained descriptor addresses */
+		if (priv->mode == STMMAC_CHAIN_MODE) {
+			if (priv->extend_desc)
+				stmmac_mode_init(priv, rx_q->dma_erx,
+						 rx_q->dma_rx_phy, DMA_RX_SIZE, 1);
+			else
+				stmmac_mode_init(priv, rx_q->dma_rx,
+						 rx_q->dma_rx_phy, DMA_RX_SIZE, 0);
+		}
+	}
+}
+
 /**
  * init_dma_rx_desc_rings - init the RX descriptor rings
  * @dev: net device structure
@@ -1271,7 +1313,7 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
+int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_count = priv->plat->rx_queues_to_use;
@@ -1402,7 +1444,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
+int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int ret;
@@ -1464,7 +1506,7 @@ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv)
  * free_dma_rx_desc_resources - free RX dma desc resources
  * @priv: private structure
  */
-static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 {
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	u32 queue;
@@ -1531,7 +1573,7 @@ static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
-static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
+int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 {
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	int ret = -ENOMEM;
@@ -1759,7 +1801,7 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
  * Description:
  * This starts all the RX and TX DMA channels
  */
-static void stmmac_start_all_dma(struct stmmac_priv *priv)
+void stmmac_start_all_dma(struct stmmac_priv *priv)
 {
 	u32 rx_channels_count = priv->plat->rx_queues_to_use;
 	u32 tx_channels_count = priv->plat->tx_queues_to_use;
@@ -1778,7 +1820,7 @@ static void stmmac_start_all_dma(struct stmmac_priv *priv)
  * Description:
  * This stops the RX and TX DMA channels
  */
-static void stmmac_stop_all_dma(struct stmmac_priv *priv)
+void stmmac_stop_all_dma(struct stmmac_priv *priv)
 {
 	u32 rx_channels_count = priv->plat->rx_queues_to_use;
 	u32 tx_channels_count = priv->plat->tx_queues_to_use;
@@ -1860,17 +1902,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
+int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	unsigned int entry, count = 0;
+	struct stmmac_tx_info *tx_info;
+	unsigned int xsk_return = 0;
 
 	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
-
 	priv->xstats.tx_clean++;
-
 	entry = tx_q->dirty_tx;
+
 	while ((entry != tx_q->cur_tx) && (count < budget)) {
 		struct sk_buff *skb = tx_q->tx_skbuff[entry];
 		struct dma_desc *p;
@@ -1881,14 +1924,54 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		else
 			p = tx_q->dma_tx + entry;
 
-		status = stmmac_tx_status(priv, &priv->dev->stats,
-				&priv->xstats, p, priv->ioaddr);
-		/* Check if the descriptor is owned by the DMA */
+		status = stmmac_tx_status(priv, &priv->dev->stats, &priv->xstats, p,
+								  priv->ioaddr);
+
+		/* Check if descriptor is ready to use */
 		if (unlikely(status & tx_dma_own))
 			break;
 
 		count++;
 
+		/* Different types of descriptors use different ways to clean */
+		tx_info = tx_q->tx_skbuff_dma + entry;
+		if (tx_info->tx_source_type == STMMAC_TX_SOURCE_UMEM) {
+			/* UMEM */
+			tx_info->tx_source_type = STMMAC_TX_SOURCE_RESET;
+
+			stmmac_clean_desc3(priv, tx_q, p);
+			stmmac_release_tx_desc(priv, p, priv->mode);
+			tx_q->tx_skbuff_dma[entry].buf = 0;
+			tx_q->tx_skbuff_dma[entry].len = 0;
+			tx_q->tx_skbuff_dma[entry].map_as_page = false;
+			tx_q->tx_skbuff_dma[entry].last_segment = false;
+			tx_q->tx_skbuff_dma[entry].is_jumbo = false;
+
+			entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+			xsk_return++;
+			continue;
+		} else if (tx_info->tx_source_type == STMMAC_TX_SOURCE_FRAME) {
+			/* UMEM frame */
+			tx_info->tx_source_type = STMMAC_TX_SOURCE_RESET;
+
+			dma_unmap_single(priv->device,
+					 tx_q->tx_skbuff_dma[entry].buf,
+					 tx_q->tx_skbuff_dma[entry].len,
+					 DMA_TO_DEVICE);
+
+			stmmac_clean_desc3(priv, tx_q, p);
+			stmmac_release_tx_desc(priv, p, priv->mode);
+			tx_q->tx_skbuff_dma[entry].buf = 0;
+			tx_q->tx_skbuff_dma[entry].len = 0;
+			tx_q->tx_skbuff_dma[entry].map_as_page = false;
+			tx_q->tx_skbuff_dma[entry].last_segment = false;
+			tx_q->tx_skbuff_dma[entry].is_jumbo = false;
+
+			entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+			continue;
+		}
+		/* if STMMAC_TX_SOURCE_SKB */
+
 		/* Make sure descriptor fields are read after reading
 		 * the own bit.
 		 */
@@ -1912,11 +1995,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 					       tx_q->tx_skbuff_dma[entry].buf,
 					       tx_q->tx_skbuff_dma[entry].len,
 					       DMA_TO_DEVICE);
-			else
+			else {
 				dma_unmap_single(priv->device,
 						 tx_q->tx_skbuff_dma[entry].buf,
 						 tx_q->tx_skbuff_dma[entry].len,
 						 DMA_TO_DEVICE);
+			}
 			tx_q->tx_skbuff_dma[entry].buf = 0;
 			tx_q->tx_skbuff_dma[entry].len = 0;
 			tx_q->tx_skbuff_dma[entry].map_as_page = false;
@@ -1940,6 +2024,13 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 	}
 	tx_q->dirty_tx = entry;
 
+	if (xsk_return != 0) {
+		xsk_umem_complete_tx(priv->xsk_umems[queue], xsk_return);
+
+		if (xsk_umem_uses_need_wakeup(priv->xsk_umems[queue]))
+			xsk_set_tx_need_wakeup(priv->xsk_umems[queue]);
+	}
+
 	netdev_tx_completed_queue(netdev_get_tx_queue(priv->dev, queue),
 				  pkts_compl, bytes_compl);
 
@@ -1947,8 +2038,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 								queue))) &&
 	    stmmac_tx_avail(priv, queue) > STMMAC_TX_THRESH) {
 
-		netif_dbg(priv, tx_done, priv->dev,
-			  "%s: restart transmit\n", __func__);
+		netif_dbg(priv, tx_done, priv->dev, "%s: restart transmit\n", __func__);
 		netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
@@ -2506,6 +2596,111 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 	}
 }
 
+/**
+ * stmmac_hw_restrict_setup() - setup GEMAC HW without configure and start DMA.
+ * @dev: network device associated with HW
+ * @init_ptp: flag to do initialization of ptp
+ *
+ * This restricted version of stmmac_hw_setup() is used to support AF_XDP.
+ * The main reason is not allow to start DMA transaction. Also it doesn't HW
+ * reset whole GEMAC controller just set registers to initial values. It helps
+ * to do restart quicker than HW restart all GEMAC.
+ */
+int stmmac_hw_restrict_setup(struct net_device *dev, bool init_ptp)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	u32 chan;
+	int ret;
+
+	/* DMA initialization and SW reset */
+	ret = stmmac_init_dma_engine_xsk(priv);
+	if (ret < 0) {
+		netdev_err(priv->dev, "%s: DMA engine initialization failed\n",
+			   __func__);
+		return ret;
+	}
+
+	/* Copy the MAC addr into the HW  */
+	stmmac_set_umac_addr(priv, priv->hw, dev->dev_addr, 0);
+
+	/* PS and related bits will be programmed according to the speed */
+	if (priv->hw->pcs) {
+		int speed = priv->plat->mac_port_sel_speed;
+
+		if ((speed == SPEED_10) || (speed == SPEED_100) ||
+		    (speed == SPEED_1000)) {
+			priv->hw->ps = speed;
+		} else {
+			dev_warn(priv->device, "invalid port speed\n");
+			priv->hw->ps = 0;
+		}
+	}
+
+	/* Initialize MTL*/
+	stmmac_mtl_configuration(priv);
+
+	/* Initialize Safety Features */
+	stmmac_safety_feat_configuration(priv);
+
+	ret = stmmac_rx_ipc(priv, priv->hw);
+	if (!ret) {
+		netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
+		priv->plat->rx_coe = STMMAC_RX_COE_NONE;
+		priv->hw->rx_csum = 0;
+	}
+
+	/* Set the HW DMA mode and the COE */
+	stmmac_dma_operation_mode(priv);
+
+	stmmac_mmc_setup(priv);
+
+	if (init_ptp) {
+		ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+		if (ret < 0)
+			netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
+
+		ret = stmmac_init_ptp(priv);
+		if (ret == -EOPNOTSUPP)
+			netdev_warn(priv->dev, "PTP not supported by HW\n");
+		else if (ret)
+			netdev_warn(priv->dev, "PTP init failed\n");
+	}
+
+	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
+
+	if (priv->use_riwt) {
+		ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
+		if (!ret)
+			priv->rx_riwt = MIN_DMA_RIWT;
+	}
+
+	if (priv->hw->pcs)
+		stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
+
+	/* set TX and RX rings length */
+	stmmac_set_rings_length(priv);
+
+	/* Enable TSO */
+	if (priv->tso) {
+		for (chan = 0; chan < tx_cnt; chan++)
+			stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
+	}
+
+	/* Enable Split Header */
+	if (priv->sph && priv->hw->rx_csum) {
+		for (chan = 0; chan < rx_cnt; chan++)
+			stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
+	}
+
+	/* VLAN Tag Insertion */
+	if (priv->dma_cap.vlins)
+		stmmac_enable_vlan(priv, priv->hw, STMMAC_VLAN_INSERT);
+
+	return 0;
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -2734,6 +2929,8 @@ static int stmmac_open(struct net_device *dev)
 	stmmac_enable_all_queues(priv);
 	netif_tx_start_all_queues(priv->dev);
 
+	atomic_set(&priv->tx_lock, 0);
+
 	return 0;
 
 lpiirq_error:
@@ -3133,27 +3330,39 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	dma_addr_t des;
 	bool has_vlan;
 	int entry;
+	u32 tx_avail = stmmac_tx_avail(priv, queue);
 
 	tx_q = &priv->tx_queue[queue];
 
+	if (atomic_read(&priv->tx_lock))
+		return NETDEV_TX_BUSY;
+	else
+		atomic_set(&priv->tx_lock, 1);
+
 	if (priv->tx_path_in_lpi_mode)
 		stmmac_disable_eee_mode(priv);
 
 	/* Manage oversized TCP frames for GMAC4 device */
 	if (skb_is_gso(skb) && priv->tso) {
-		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
-			return stmmac_tso_xmit(skb, dev);
+		if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+			netdev_tx_t result;
+
+			result = stmmac_tso_xmit(skb, dev);
+			atomic_set(&priv->tx_lock, 0);
+
+			return result;
+		}
 	}
 
-	if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
+	if (unlikely(tx_avail < nfrags + 1)) {
 		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
-			netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
-								queue));
+			netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
 			/* This is a hard error, log it. */
-			netdev_err(priv->dev,
-				   "%s: Tx Ring full when queue awake\n",
-				   __func__);
+			netdev_err(priv->dev, "%s: Tx Ring full when queue awake\n",
+					   __func__);
 		}
+		atomic_set(&priv->tx_lock, 0);
+
 		return NETDEV_TX_BUSY;
 	}
 
@@ -3330,12 +3539,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 	stmmac_tx_timer_arm(priv, queue);
 
+	atomic_set(&priv->tx_lock, 0);
+
 	return NETDEV_TX_OK;
 
 dma_map_err:
 	netdev_err(priv->dev, "Tx DMA map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
+
+	atomic_set(&priv->tx_lock, 0);
+
 	return NETDEV_TX_OK;
 }
 
@@ -3667,9 +3881,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 
 	priv->xstats.napi_poll++;
 
-	work_done = stmmac_rx(priv, budget, chan);
+    if (priv->xsk_umems && priv->xsk_umems[chan])
+            work_done = stmmac_rx_xsk(priv, budget, chan);
+    else
+            work_done = stmmac_rx(priv, budget, chan);
+
 	if (work_done < budget && napi_complete_done(napi, work_done))
 		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+
 	return work_done;
 }
 
@@ -3680,12 +3899,26 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	struct stmmac_priv *priv = ch->priv_data;
 	struct stmmac_tx_queue *tx_q;
 	u32 chan = ch->index;
-	int work_done;
+	int work_done = 0;
 
 	priv->xstats.napi_poll++;
 
-	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
-	work_done = min(work_done, budget);
+	if (priv->xsk_umems && priv->xsk_umems[chan]) {
+		stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
+	} else {
+		work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
+		work_done = min(work_done, budget);
+	}
+
+	/* If UMEM is used then start transmit with zero-copy */
+	if (!atomic_read(&priv->tx_lock)) {
+		atomic_set(&priv->tx_lock, 1);
+
+		if (priv->xsk_umems && priv->xsk_umems[chan])
+			work_done = stmmac_xdp_transmit_zc(priv, budget);
+
+		atomic_set(&priv->tx_lock, 0);
+	}
 
 	if (work_done < budget)
 		napi_complete_done(napi, work_done);
@@ -3712,7 +3945,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 static void stmmac_tx_timeout(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-
 	stmmac_global_err(priv);
 }
 
@@ -4305,6 +4537,9 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_set_mac_address = stmmac_set_mac_address,
 	.ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
+	.ndo_bpf = stmmac_bpf,
+	.ndo_xdp_xmit = stmmac_xdp_xmit,
+	.ndo_xsk_wakeup = stmmac_xsk_wakeup,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
@@ -4492,6 +4727,12 @@ int stmmac_dvr_probe(struct device *device,
 		return -ENOMEM;
 	}
 
+	priv->refill_wq = create_singlethread_workqueue("stmmac_refill_wq");
+	if (!priv->refill_wq) {
+		dev_err(priv->device, "failed to create refill workqueue\n");
+		return -ENOMEM;
+	}
+
 	INIT_WORK(&priv->service_task, stmmac_service_task);
 
 	/* Override with kernel parameters if supplied XXX CRS XXX
@@ -4693,6 +4934,7 @@ error_mdio_register:
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
+	destroy_workqueue(priv->refill_wq);
 
 	return ret;
 }
@@ -4729,6 +4971,7 @@ int stmmac_dvr_remove(struct device *dev)
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
+	destroy_workqueue(priv->refill_wq);
 	mutex_destroy(&priv->lock);
 
 	return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.c
new file mode 100644
index 0000000000000..1a6e488dac857
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.c
@@ -0,0 +1,1537 @@
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include <linux/pinctrl/consumer.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "stmmac_xsk.h"
+
+/**
+ * stmmac_zca_free() - this function reuse UMEM memory again with dirty desc.
+ * @alloc: copy allocator structure
+ * @handle: pointer of UMEM memory
+ *
+ * Called in case of using ZERO copy memory model when convert to XDP frame.
+ */
+static void stmmac_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
+{
+	int queue = 0;
+	struct stmmac_priv *priv = container_of(alloc, struct stmmac_priv, zca);
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	unsigned int entry = rx_q->dirty_rx;
+	struct stmmac_xsk_desc_map *buf = &rx_q->desc_map[entry];
+	struct xdp_umem *umem = priv->xsk_umems[queue];
+	struct dma_desc *p;
+	u64 hr, mask;
+	size_t len;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	if (priv->extend_desc)
+		p = (struct dma_desc *)(rx_q->dma_erx + entry);
+	else
+		p = rx_q->dma_rx + entry;
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+	mask = umem->chunk_mask;
+	handle &= mask;
+
+	buf->dma_addr = xdp_umem_get_dma(umem, handle);
+	buf->dma_addr += hr;
+	buf->cpu_addr = xdp_umem_get_data(umem, handle);
+	buf->cpu_addr += hr;
+	buf->handle = xsk_umem_adjust_offset(umem, (u64)handle, umem->headroom);
+
+	len = priv->xsk_umems[queue]->chunk_size_nohr - XDP_PACKET_HEADROOM;
+	dma_sync_single_range_for_device(priv->device, buf->dma_addr,
+					 buf->page_offset, len,
+					 DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(priv->device, buf->dma_addr)) {
+		netdev_err(priv->dev, "Rx DMA map failed\n");
+		return;
+	}
+
+	stmmac_set_desc_addr(priv, p, buf->dma_addr);
+	stmmac_refill_desc3(priv, rx_q, p);
+
+	dma_wmb();
+	stmmac_set_rx_owner(priv, p, priv->use_riwt);
+	dma_wmb();
+
+	rx_q->dirty_rx = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_alloc_frames_for_xsk() - allocate memory for XSK frames
+ * @priv: gemac main structure
+ * @queue: queue number of the net device
+ *
+ * Any XSK packet form UMEM can be converted to frame for use in stack or
+ * retransmit so allocate memory in advance.
+ */
+static int stmmac_alloc_frames_for_xsk(struct stmmac_priv *priv, int queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	size_t len = priv->xsk_umems[queue]->chunk_size_nohr - XDP_PACKET_HEADROOM;
+	int err_i;
+	int i;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	for (i = 0; i < DMA_TX_SIZE; i++) {
+		tx_q->tx_skbuff_dma[i].page = kcalloc(1, len, GFP_KERNEL);
+		if (!tx_q->tx_skbuff_dma[i].page)
+			goto err;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+
+err:
+	pr_err("AF_XDP: can not allocate memory for XSK frames\n");
+
+	err_i = i;
+	for (i = 0; i < err_i; ++i)
+		kfree(tx_q->tx_skbuff_dma[i].page);
+
+	return -ENOMEM;
+}
+
+/**
+ * stmmac_free_frames_for_xsk() - free memory for XSK frames
+ * @priv: gemac main structure
+ * @queue: queue number of the net device
+ */
+static void stmmac_free_frames_for_xsk(struct stmmac_priv *priv, int queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	int i;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	for (i = 0; i < DMA_TX_SIZE; i++)
+		kfree(tx_q->tx_skbuff_dma[i].page);
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_xmit_xdp_frame() - transmit one XSK frame
+ * @priv: gemac main structure
+ * @xdpf: pointer to the frame for transmitting
+ *
+ * Return: STMMAC_XDP_TX - ok	STMMAC_XDP_CONSUMED - failed
+ */
+static int stmmac_xmit_xdp_frame(struct stmmac_priv *priv, struct xdp_frame *xdpf)
+{
+	int queue = 0;
+	struct dma_desc *desc;
+	struct stmmac_tx_queue *tx_q = priv->tx_queue + queue;
+	int entry = tx_q->cur_tx;
+	dma_addr_t dma;
+	u32 tx_avail = stmmac_tx_avail(priv, queue);
+
+	if (atomic_read(&priv->tx_lock))
+		return STMMAC_XDP_CONSUMED;
+	else
+		atomic_set(&priv->tx_lock, 1);
+
+	if (!tx_avail)
+		goto err;
+
+	if (priv->extend_desc)
+		desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+	else
+		desc = tx_q->dma_tx + entry;
+
+	memcpy(tx_q->tx_skbuff_dma[entry].page, xdpf->data, xdpf->len);
+	dma = dma_map_single(priv->device, tx_q->tx_skbuff_dma[entry].page,
+			     xdpf->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(priv->device, dma))
+		goto err;
+
+	tx_q->cur_tx = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+	tx_q->tx_skbuff_dma[entry].tx_source_type = STMMAC_TX_SOURCE_FRAME;
+	tx_q->tx_skbuff_dma[entry].buf = dma;
+	tx_q->tx_skbuff_dma[entry].len = xdpf->len;
+	tx_q->tx_skbuff_dma[entry].page_addr = xdpf->data;
+
+	stmmac_set_desc_addr(priv, desc, dma);
+	dma_wmb();
+	stmmac_prepare_tx_desc(priv, desc, 1, xdpf->len, 0, priv->mode, 1, 1,
+			       xdpf->len);
+	dma_wmb();
+	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+	__free_page(virt_to_page(xdpf->data));
+
+	atomic_set(&priv->tx_lock, 0);
+
+	return STMMAC_XDP_TX;
+
+err:
+	__free_page(virt_to_page(xdpf->data));
+
+	atomic_set(&priv->tx_lock, 0);
+
+	return STMMAC_XDP_CONSUMED;
+}
+
+/**
+ * stmmac_xdp_transmit_zc() - transmit UMEM packets
+ * @priv: gemac main structure
+ * @napi_budget: budget to retransmit
+ *
+ * Main transmitting routine. Transmit packets got only from UMEM.
+ *
+ * Return: number of packets has been transmitted.
+ */
+int stmmac_xdp_transmit_zc(struct stmmac_priv *priv, int napi_budget)
+{
+	struct dma_desc *desc;
+	struct xdp_desc xdp_desc;
+	struct stmmac_tx_queue *tx_q = priv->tx_queue;
+	struct stmmac_tx_info *tx_info;
+	int entry = tx_q->cur_tx;
+	int csum_insertion = 0;
+	int queue = 0;
+	u32 desc_processed = 0;
+	u32 desc_avaliable = stmmac_tx_avail(priv, queue);
+	dma_addr_t dma;
+
+	/* Save batch of descriptors to ndo_xmit() */
+	if (desc_avaliable < STMMAC_TX_XMIT_SAFE_AMOUNT)
+		return 0;
+
+	while ((napi_budget-- > 0) && (desc_avaliable-- > 0)) {
+		/* Acquire data from UMEM */
+		if (!xsk_umem_consume_tx(priv->xsk_umems[queue], &xdp_desc))
+			break;
+
+		/* Get descriptor by index */
+		if (likely(priv->extend_desc))
+			desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+		else
+			desc = tx_q->dma_tx + entry;
+
+		/* We use source type when clear Tx descriptors */
+		tx_info = tx_q->tx_skbuff_dma + entry;
+		tx_info->tx_source_type = STMMAC_TX_SOURCE_UMEM;
+
+		/* Prepare for use with GEMAC */
+		dma = xdp_umem_get_dma(priv->xsk_umems[queue], xdp_desc.addr);
+		dma_sync_single_for_device(priv->device, dma, xdp_desc.len,
+					   DMA_BIDIRECTIONAL);
+
+		/* Fill in descriptors with data */
+		stmmac_set_desc_addr(priv, desc, dma);
+		stmmac_prepare_tx_desc(priv, desc, 1, xdp_desc.len,
+				       csum_insertion, priv->mode, 1, 1,
+				       xdp_desc.len);
+
+		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		tx_q->cur_tx = entry;
+
+		++desc_processed;
+	}
+
+	/* Notify socket in user space about written data */
+	if (desc_processed)
+		xsk_umem_consume_tx_done(priv->xsk_umems[queue]);
+
+	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+	return desc_processed;
+}
+
+/**
+ * stmmac_rx_refill_xsk() - try to acquire descriptors for XSK from UMEM
+ * @priv: gemac main structure
+ * @queue: queue number of the net device
+ *
+ * Return: number of descriptors acquired form UMEM
+ */
+static int stmmac_rx_refill_xsk(struct stmmac_priv *priv, u32 queue)
+{
+	static atomic_t lock = ATOMIC_INIT(0);
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_xsk_desc_map *buf;
+	struct xdp_umem *umem = NULL;
+	unsigned int entry = rx_q->dirty_rx;
+	unsigned long timeout = jiffies + msecs_to_jiffies(1);
+	int dirty = stmmac_rx_dirty(priv, queue);
+	int cleaned = 0;
+	u64 hr = 0;
+	u64 handle;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	if ((priv->xsk_umems == NULL) || (priv->xsk_umems[queue] == NULL))
+		return -EPERM;
+
+	umem = priv->xsk_umems[queue];
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	if (atomic_read(&lock))
+		return -EBUSY;
+
+	atomic_set(&lock, 1);
+
+	while (dirty-- > 0) {
+		struct dma_desc *p;
+		size_t buf_size;
+
+		/* Buffer info (extra data) is used for XSK */
+		buf = &rx_q->desc_map[entry];
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *)(rx_q->dma_erx + entry);
+		else
+			p = rx_q->dma_rx + entry;
+
+		/* Acquire UMEM handle */
+		if (!xsk_umem_peek_addr(priv->xsk_umems[queue], &handle)) {
+			if (rx_q->rx_empty) {
+				/* Notify user space to clear RX queue and refill FQ queue */
+				if (xsk_umem_uses_need_wakeup(priv->xsk_umems[queue]))
+					xsk_set_rx_need_wakeup(priv->xsk_umems[queue]);
+
+				/* Try to acquire descriptors greedily */
+				if (time_after(jiffies, timeout))
+					break;
+				else
+					continue;
+			}
+
+			break;
+		}
+		dirty--;
+
+		buf->dma_addr = xdp_umem_get_dma(umem, handle);
+		buf->dma_addr += hr;
+		buf->cpu_addr = xdp_umem_get_data(umem, handle);
+		buf->cpu_addr += hr;
+		buf->handle = handle + umem->headroom;
+
+		/* Notify UMEM that we have taken one element */
+		xsk_umem_discard_addr(priv->xsk_umems[queue]);
+
+		rx_q->rx_empty = false;
+
+		buf_size = priv->xsk_umems[queue]->chunk_size_nohr - XDP_PACKET_HEADROOM;
+		dma_sync_single_range_for_device(priv->device, buf->dma_addr,
+						 buf->page_offset, buf_size,
+						 DMA_BIDIRECTIONAL);
+
+		if (dma_mapping_error(priv->device, buf->dma_addr)) {
+			netdev_err(priv->dev, "Rx DMA map failed\n");
+			break;
+		}
+
+		stmmac_set_desc_addr(priv, p, buf->dma_addr);
+		stmmac_refill_desc3(priv, rx_q, p);
+
+		if (rx_q->rx_zeroc_thresh > 0)
+			rx_q->rx_zeroc_thresh--;
+
+		dma_wmb();
+		stmmac_set_rx_owner(priv, p, priv->use_riwt);
+		dma_wmb();
+		entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
+
+		++cleaned;
+	}
+	rx_q->dirty_rx = entry;
+
+	atomic_set(&lock, 0);
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return cleaned;
+}
+
+/**
+ * stmmac_initial_refill() - try to acquire descriptors for XSK from UMEM in
+ * initialization process.
+ * @priv: gemac main structure
+ * @queue: queue number of the net device
+ *
+ * Return: number of descriptors acquired form UMEM
+ */
+static int stmmac_initial_refill(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct dma_desc *p;
+	struct xdp_umem *umem;
+	int result = 0;
+	int count = 0;
+	int i;
+	u64 hr;
+	u64 handle;
+	size_t len;
+
+	/* Check if UMEM is initialized */
+	if (priv->num_xsk_umems_used == 0)
+		return result;
+
+	umem = priv->xsk_umems[queue];
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	for (i = 0; i < DMA_RX_SIZE; ++i) {
+		/* Get descriptor pointer */
+		if (priv->extend_desc)
+			p = (struct dma_desc *)(rx_q->dma_erx + i);
+		else
+			p = rx_q->dma_rx + i;
+
+		/* Peek UMEM element */
+		if (!xsk_umem_peek_addr(priv->xsk_umems[queue], &handle))
+			break;
+
+		/* Place UMEM element to store */
+		rx_q->desc_map[i].dma_addr = xdp_umem_get_dma(umem, handle);
+		rx_q->desc_map[i].dma_addr += hr;
+		rx_q->desc_map[i].cpu_addr = xdp_umem_get_data(umem, handle);
+		rx_q->desc_map[i].cpu_addr += hr;
+		rx_q->desc_map[i].handle = handle + umem->headroom;
+
+		/* Notify UMEM that we take one element */
+		xsk_umem_discard_addr(priv->xsk_umems[queue]);
+
+		/* Sync DMA for use on device */
+		len = priv->xsk_umems[queue]->chunk_size_nohr - XDP_PACKET_HEADROOM;
+		dma_sync_single_range_for_device(priv->device,
+						 rx_q->desc_map[i].dma_addr,
+						 rx_q->desc_map[i].page_offset,
+						 len,
+						 DMA_BIDIRECTIONAL);
+
+		if (dma_mapping_error(priv->device, rx_q->desc_map[i].dma_addr)) {
+			netdev_err(priv->dev, "Rx DMA map failed\n");
+			break;
+		}
+
+		/* Setup DMA descriptor with new value */
+		stmmac_set_desc_addr(priv, p, rx_q->desc_map[i].dma_addr);
+		stmmac_refill_desc3(priv, rx_q, p);
+
+		dma_wmb();
+		stmmac_set_rx_owner(priv, p, priv->use_riwt);
+		dma_wmb();
+
+		++count;
+	}
+
+	if (count)
+		rx_q->rx_empty = false;
+
+	/* Setup ring descriptor pointers */
+	rx_q->cur_rx = 0;
+	rx_q->dirty_rx = count % DMA_RX_SIZE;
+
+	DBG("Ring pointers [cur:dirty] = [%u:%u]\n", rx_q->cur_rx, rx_q->dirty_rx);
+
+	/* This is unusual case just notify about it */
+	if (count < DMA_RX_SIZE)
+		pr_info("AF_XDP: Rx DMA ring is not filled completely %u of %u\n",
+			count, DMA_RX_SIZE);
+
+	return count;
+}
+
+/**
+ * stmmac_refill_timer() - timer routine for deferred refilling
+ * @t: associated timer
+ */
+static void stmmac_refill_timer(struct timer_list *t)
+{
+	struct stmmac_rx_queue *rx_q = from_timer(rx_q, t, rx_refill_timer);
+	struct stmmac_priv *priv = rx_q->priv_data;
+
+	stmmac_rx_refill_xsk(priv, rx_q->queue_index);
+
+	/* Timer can be adjusted to different time in rx_poll_xsk() if necessary */
+	mod_timer(&rx_q->rx_refill_timer,
+		  jiffies + msecs_to_jiffies(STMMAC_REFILL_MS));
+}
+
+/**
+ * stmmac_rx_timer() - timer routine to service Rx initialization and refilling
+ * @t: associated timer
+ *
+ * It can happens there are no UMEMs descriptors when initialization finish so
+ * run timer to try acquire UMEMs descriptors and finish ring initialization.
+ */
+static void stmmac_rx_timer(struct timer_list *t)
+{
+        struct stmmac_rx_queue *rx_q = from_timer(rx_q, t, rx_init_timer);
+        struct stmmac_priv *priv = rx_q->priv_data;
+        int is_refilled = 0;
+
+        DBG("%s-->\n", __FUNCTION__);
+
+        /* Try to do initial refill till it has succeed */
+        is_refilled = stmmac_initial_refill(priv, rx_q->queue_index);
+        if (!is_refilled) {
+        	mod_timer(&rx_q->rx_init_timer,
+        		  jiffies + msecs_to_jiffies(STMMAC_INITIAL_REFILL_MS));
+            return;
+        }
+
+        /* It helps to solve problem with first descriptor owing */
+        init_dma_rx_desc_rings_xsk(priv->dev);
+
+        pr_info("AF_XDP: started\n");
+    	stmmac_mac_set(priv, priv->ioaddr, true);
+    	stmmac_start_all_dma(priv);
+
+    	timer_setup(&rx_q->rx_refill_timer, stmmac_refill_timer, 0);
+    	mod_timer(&rx_q->rx_refill_timer,
+    		  jiffies + msecs_to_jiffies(STMMAC_REFILL_MS));
+
+    	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_init_rx_service_timer() - service timer initialization routine
+ * @priv: gemac main structure
+ */
+static void stmmac_init_rx_service_timer(struct stmmac_priv *priv)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[0];
+
+	timer_setup(&rx_q->rx_init_timer, stmmac_rx_timer, 0);
+	mod_timer(&rx_q->rx_init_timer, jiffies + msecs_to_jiffies(500));
+}
+
+/**
+ * stmmac_run_xdp_zc() - run XDP filter and make actions based on the result
+ * @priv: gemac main structure
+ * @xdp: buffer to make action on it
+ *
+ * Return: code of action made on xdp buffer
+ */
+static int stmmac_run_xdp_zc(struct stmmac_priv *priv, struct xdp_buff *xdp)
+{
+	struct bpf_prog *xdp_prog;
+	struct xdp_frame *xdpf;
+	int result = STMMAC_XDP_PASS;
+	int err;
+	u64 offset;
+	u32 act;
+
+	rcu_read_lock();
+
+	xdp_prog = READ_ONCE(priv->xdp_prog);
+	if (xdp_prog) {
+		act = bpf_prog_run_xdp(xdp_prog, xdp);
+	} else {
+		rcu_read_unlock();
+		return -1;
+	}
+
+	offset = xdp->data - xdp->data_hard_start;
+	xdp->handle = xsk_umem_adjust_offset(priv->xsk_umems[0], xdp->handle,
+					     offset);
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdpf = convert_to_xdp_frame(xdp);
+		if (unlikely(!xdpf)) {
+			result = STMMAC_XDP_CONSUMED;
+			break;
+		}
+		result = stmmac_xmit_xdp_frame(priv, xdpf);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(priv->dev, xdp, xdp_prog);
+		result = !err ? STMMAC_XDP_REDIR : STMMAC_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(priv->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+		result = STMMAC_XDP_CONSUMED;
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return result;
+}
+
+/**
+ * stmmac_rx_xsk() - main receiving packets routine. Rx NAPI registered routine.
+ * @priv: gemac main structure
+ * @limit: NAPI budget
+ * @queue: queue number of the net device
+ *
+ * This function can block (set flag) receive queue so there is no any receiving
+ * operation until queue will be refilled
+ *
+ * Return: numbers of received packets
+ */
+int stmmac_rx_xsk(struct stmmac_priv *priv, int limit, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
+	struct sk_buff *skb = NULL;
+	struct xdp_buff xdp;
+	unsigned int next_entry = rx_q->cur_rx;
+	unsigned int count = 0;
+	unsigned int error = 0;
+	unsigned int len = 0;
+	unsigned int xdp_res;
+	int status = 0;
+	int coe = priv->hw->rx_csum;
+	bool do_flush = false;
+
+	if (netif_msg_rx_status(priv)) {
+		void *rx_head;
+
+		netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
+		if (priv->extend_desc)
+			rx_head = (void *)rx_q->dma_erx;
+		else
+			rx_head = (void *)rx_q->dma_rx;
+
+		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
+	}
+
+	while ((count < limit) && !rx_q->rx_empty) {
+		struct stmmac_xsk_desc_map *buf;
+		struct dma_desc *np, *p;
+		unsigned int sec_len;
+		unsigned int hlen = 0, prev_len = 0;
+		enum pkt_hash_types hash_type;
+		int entry;
+		u32 hash;
+
+		if (!count && rx_q->state_saved) {
+			skb = rx_q->state.skb;
+			error = rx_q->state.error;
+			len = rx_q->state.len;
+		} else {
+			rx_q->state_saved = false;
+			skb = NULL;
+			error = 0;
+			len = 0;
+		}
+
+		if (count >= limit)
+			break;
+
+read_again:
+		sec_len = 0;
+		entry = next_entry;
+		buf = rx_q->desc_map + entry;
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *)(rx_q->dma_erx + entry);
+		else
+			p = rx_q->dma_rx + entry;
+
+		status = stmmac_rx_status(priv, &priv->dev->stats,
+					  &priv->xstats, p);
+
+		/* Check if descriptor is ready to use */
+		if (unlikely(status & dma_own))
+			break;
+
+		if (STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE) == rx_q->dirty_rx) {
+			/* There are no more owned and refilled descriptors.
+			 * All descriptors are read and queue is empty. Notify upper level.
+			 */
+			rx_q->rx_empty = true;
+		} else {
+			rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
+			next_entry = rx_q->cur_rx;
+		}
+
+		if (priv->extend_desc)
+			np = (struct dma_desc *)(rx_q->dma_erx + next_entry);
+		else
+			np = rx_q->dma_rx + next_entry;
+
+		prefetch(np);
+		prefetch(buf->cpu_addr);
+
+		if (priv->extend_desc)
+			stmmac_rx_extended_status(priv, &priv->dev->stats,
+						  &priv->xstats,
+						  rx_q->dma_erx + entry);
+
+		if (unlikely(status == discard_frame)) {
+			error = 1;
+			if (!priv->hwts_rx_en)
+				priv->dev->stats.rx_errors++;
+		}
+
+		if (unlikely(error && (status & rx_not_ls)))
+			goto read_again;
+
+		if (unlikely(error)) {
+			dev_kfree_skb(skb);
+			count++;
+			continue;
+		}
+
+		/* Buffer is good. Go on. */
+
+		if (likely(status & rx_not_ls)) {
+			len += priv->dma_buf_sz;
+		} else {
+			prev_len = len;
+			len = stmmac_get_rx_frame_len(priv, p, coe);
+
+			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
+			 * Type frames (LLC/LLC-SNAP)
+			 *
+			 * llc_snap is never checked in GMAC >= 4, so this ACS
+			 * feature is always disabled and packets need to be
+			 * stripped manually.
+			 */
+			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
+				unlikely(status != llc_snap)) {
+					len -= ETH_FCS_LEN;
+			}
+		}
+
+		/* Sanity check */
+		if (!len)
+			continue;
+
+		/* It's time to run XDP program */
+		dma_sync_single_range_for_cpu(priv->device, buf->dma_addr,
+					      buf->page_offset, len,
+					      DMA_BIDIRECTIONAL);
+
+		xdp.rxq = &rx_q->xdp_rxq;
+		xdp.data = buf->cpu_addr;
+		xdp.data_meta = xdp.data;
+		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+		xdp.data_end = xdp.data + len;
+		xdp.handle = buf->handle;
+
+		xdp_res = stmmac_run_xdp_zc(priv, &xdp);
+		if ((xdp_res == STMMAC_XDP_REDIR)) {
+			count++;
+			do_flush = true;
+			continue;
+		} else if ((xdp_res == STMMAC_XDP_TX) || (xdp_res == STMMAC_XDP_CONSUMED)) {
+			count++;
+			continue;
+		}
+		/* Pass XDP packet forward to the network stack */
+
+		/* Allocate SKB if necessary */
+		if (!skb) {
+			int ret = stmmac_get_rx_header_len(priv, p, &hlen);
+
+			if (priv->sph && !ret && (hlen > 0)) {
+				sec_len = len;
+				if (!(status & rx_not_ls))
+					sec_len = sec_len - hlen;
+				len = hlen;
+
+				priv->xstats.rx_split_hdr_pkt_n++;
+			}
+
+			skb = napi_alloc_skb(&ch->rx_napi, len);
+			if (!skb) {
+				priv->dev->stats.rx_dropped++;
+				count++;
+				continue;
+			}
+
+			dma_sync_single_range_for_cpu(priv->device,
+						      buf->dma_addr,
+						      buf->page_offset, len,
+						      DMA_BIDIRECTIONAL);
+
+			skb_copy_to_linear_data(skb, buf->cpu_addr, len);
+			skb_put(skb, len);
+		} else {
+			unsigned int buf_len = len - prev_len;
+
+			if (likely(status & rx_not_ls))
+				buf_len = priv->dma_buf_sz;
+
+			dma_sync_single_range_for_cpu(priv->device,
+						      buf->dma_addr,
+						      buf->page_offset, len,
+						      DMA_BIDIRECTIONAL);
+
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+					buf->cpu_addr, 0, buf_len,
+					priv->dma_buf_sz);
+		}
+
+		if (likely(status & rx_not_ls))
+			goto read_again;
+
+		/* Got entire packet into SKB. Finish it. */
+		skb->protocol = eth_type_trans(skb, priv->dev);
+
+		if (unlikely(!coe))
+			skb_checksum_none_assert(skb);
+		else
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+			skb_set_hash(skb, hash, hash_type);
+
+		skb_record_rx_queue(skb, queue);
+		napi_gro_receive(&ch->rx_napi, skb);
+
+		priv->dev->stats.rx_packets++;
+		priv->dev->stats.rx_bytes += len;
+		count++;
+	}
+
+	if (status & rx_not_ls) {
+		rx_q->state_saved = true;
+		rx_q->state.skb = skb;
+		rx_q->state.error = error;
+		rx_q->state.len = len;
+	}
+
+	if (do_flush)
+		xdp_do_flush_map();
+
+	stmmac_rx_refill_xsk(priv, queue);
+	/* Make a decision whether we restart refilling and when */
+	if (stmmac_rx_dirty(priv, queue) > STMMAC_REFILL_GREEDILY_THRESHOLD) {
+		/* Notify user application we run out of descriptors */
+		if (xsk_umem_uses_need_wakeup(priv->xsk_umems[queue]))
+			xsk_set_rx_need_wakeup(priv->xsk_umems[queue]);
+
+		/* Start looking for descriptors in hard way */
+		mod_timer(&rx_q->rx_refill_timer,
+			  jiffies + msecs_to_jiffies(STMMAC_REFILL_GREEDILY_MS));
+	} else {
+		/* We don't want to notify user application to start
+		 * looking for descriptors in hard way so clear flag
+		 */
+		if (xsk_umem_uses_need_wakeup(priv->xsk_umems[queue]))
+			xsk_clear_rx_need_wakeup(priv->xsk_umems[queue]);
+
+		/* Just from time to time check if clearing go well */
+		mod_timer(&rx_q->rx_refill_timer,
+			  jiffies + msecs_to_jiffies(STMMAC_REFILL_MS));
+	}
+
+	priv->xstats.rx_pkt_n += count;
+
+	/* Sanity check. If it happens notify user and let the NAPI works  */
+	if (WARN_ONCE(count > limit, "NAPI return value higher than budget!\n"))
+		count = limit;
+
+	return count;
+}
+
+/**
+ * stmmac_remove_xsk_umem() - free UMEM resources
+ * @priv: gemac main structure
+ * @qid: queue number of the net device
+ */
+static void stmmac_remove_xsk_umem(struct stmmac_priv *priv, u16 qid)
+{
+	DBG("%s-->\n", __FUNCTION__);
+
+	priv->xsk_umems[qid] = NULL;
+	priv->num_xsk_umems_used--;
+
+	if (priv->num_xsk_umems == 0) {
+		kfree(priv->xsk_umems);
+		priv->xsk_umems = NULL;
+		priv->num_xsk_umems = 0;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_alloc_xsk_umems() - alloc UMEM resources
+ * @priv: gemac main structure
+ *
+ * Return: 0 - ok	-ENOMEM - fail
+ */
+static int stmmac_alloc_xsk_umems(struct stmmac_priv *priv)
+{
+	if (priv->xsk_umems)
+		return 0;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	priv->num_xsk_umems_used = 0;
+	priv->num_xsk_umems = MTL_MAX_RX_QUEUES;
+	priv->xsk_umems = kcalloc(priv->num_xsk_umems, sizeof(*priv->xsk_umems),
+				  GFP_KERNEL);
+	if (!priv->xsk_umems) {
+		priv->num_xsk_umems = 0;
+		return -ENOMEM;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * stmmac_add_xsk_umem() - add to UMEM auxiliary data
+ * @priv: gemac main structure
+ * @umem: allocated UMEM
+ * @qid: queue number of the net device
+ *
+ * Return: 0 - ok	-ENOMEM - fail
+ */
+static int stmmac_add_xsk_umem(struct stmmac_priv *priv, struct xdp_umem *umem,
+			       u16 qid)
+{
+	int err;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	err = stmmac_alloc_xsk_umems(priv);
+	if (err)
+		return err;
+
+	priv->xsk_umems[qid] = umem;
+	priv->num_xsk_umems_used++;
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * stmmac_xsk_umem_dma_map() - map DMA memory for UMEM descriptors
+ * @priv: gemac main structure
+ * @qid: associated UMEM
+ *
+ * Return: 0 - ok	-ENOMEM - fail
+ */
+static int stmmac_xsk_umem_dma_map(struct stmmac_priv *priv,
+				   struct xdp_umem *umem)
+{
+	struct device *dev = priv->device;
+	unsigned int i, j;
+	dma_addr_t dma;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
+					 DMA_BIDIRECTIONAL, DMA_ATTR);
+		if (dma_mapping_error(dev, dma))
+			goto out_unmap;
+
+		umem->pages[i].dma = dma;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+
+out_unmap:
+	for (j = 0; j < i; j++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * stmmac_xdp_setup() - setup new XDP filter
+ * @dev: gemac main structure
+ * @prog: filter program
+ *
+ * Return: 0 - ok
+ */
+static int stmmac_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	synchronize_rcu();
+	old_prog = xchg(&priv->xdp_prog, prog);
+	rcu_assign_pointer(priv->xdp_prog, prog);
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * free_dma_rx_desc_resources_xsk() - free DMA descriptors for every ring
+ * @priv: gemac main structure
+ */
+void free_dma_rx_desc_resources_xsk(struct stmmac_priv *priv)
+{
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	/* Free RX queue resources */
+	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		/* Free DMA regions of consistent memory previously allocated */
+		if (!priv->extend_desc)
+			dma_free_coherent(priv->device,
+					  DMA_RX_SIZE * sizeof(struct dma_desc),
+					  rx_q->dma_rx, rx_q->dma_rx_phy);
+		else
+			dma_free_coherent(priv->device,
+					  DMA_RX_SIZE * sizeof(struct dma_extended_desc),
+					  rx_q->dma_erx, rx_q->dma_rx_phy);
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * alloc_dma_rx_desc_resources_xsk() - allocate DMA descriptors for every ring
+ * @priv: gemac main structure
+ *
+ * Return: 0 - ok	-ENOMEM - fail
+ */
+int alloc_dma_rx_desc_resources_xsk(struct stmmac_priv *priv)
+{
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+	u32 err_queue;
+	size_t len;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	/* RX queues buffers and DMA */
+	for (queue = 0; queue < rx_count; ++queue) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		rx_q->queue_index = queue;
+		rx_q->priv_data = priv;
+
+		if (priv->extend_desc) {
+			len = DMA_RX_SIZE * sizeof(struct dma_extended_desc);
+			rx_q->dma_erx = dma_alloc_coherent(priv->device,
+							   len,
+							   &rx_q->dma_rx_phy,
+							   GFP_KERNEL);
+			if (!rx_q->dma_erx)
+				goto err_dma;
+		} else {
+			len = DMA_RX_SIZE * sizeof(struct dma_desc);
+			rx_q->dma_rx = dma_alloc_coherent(priv->device,
+							  len,
+							  &rx_q->dma_rx_phy,
+							  GFP_KERNEL);
+			if (!rx_q->dma_rx)
+				goto err_dma;
+		}
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+
+err_dma:
+	pr_err("AF_XDP: Can not allocate DMA coherent memory!\n");
+
+	err_queue = queue;
+	for (queue = 0; queue < err_queue; ++queue) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		if (priv->extend_desc) {
+			len = DMA_RX_SIZE * sizeof(struct dma_extended_desc);
+			dma_free_coherent(priv->device,
+					 len, rx_q->dma_erx, rx_q->dma_rx_phy);
+		} else {
+			len = DMA_RX_SIZE * sizeof(struct dma_desc);
+			dma_free_coherent(priv->device,
+					  len,  rx_q->dma_rx, rx_q->dma_rx_phy);
+		}
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * stmmac_txrx_ring_disable() - stop and free resources for ring. Stop DMA engine
+ * @priv: gemac main structure
+ * @ring: number of associated ring
+ */
+static void stmmac_txrx_ring_disable(struct stmmac_priv *priv, int ring)
+{
+	struct stmmac_channel *ch = &priv->channel[ring];
+	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	/* Sanity check */
+	if (ring > maxq)
+		return;
+
+	/* Stop GEMAC engine */
+	stmmac_mac_set(priv, priv->ioaddr, false);
+	stmmac_stop_all_dma(priv);
+
+	/* Wait finishing last transactions */
+	msleep(100);
+
+	netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, ring));
+
+	/* Everything is ready to stop NAPIs */
+	if (ring < rx_queues_cnt)
+		napi_disable(&ch->rx_napi);
+	if (ring < tx_queues_cnt)
+		napi_disable(&ch->tx_napi);
+
+	if (priv->num_xsk_umems_used && priv->xsk_umems[ring]) {
+		/* Disable UMEM resources */
+		DBG("%s: UMEM memory model disable\n", __FUNCTION__);
+
+		xdp_do_flush_map();
+		xdp_rxq_info_unreg(&priv->rx_queue[ring].xdp_rxq);
+		stmmac_free_frames_for_xsk(priv, ring);
+		free_dma_rx_desc_resources_xsk(priv);
+	} else {
+		/* Disable resources in case of using pool of pages */
+		DBG("%s: page pool memory model disable\n", __FUNCTION__);
+
+		free_dma_rx_desc_resources(priv);
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_reset_watchdog_envent() - defer transmit time to avoid network schedule
+ * timeout.
+ * @priv: gemac main structure
+ * @ring: ring number
+ *
+ * Reset start time to allow acquire UMEM descriptors. It would be better to
+ * disable ring till it own UMEM but now it's made that way.
+ */
+static void stmmac_reset_watchdog_envent(struct stmmac_priv *priv, int ring)
+{
+	struct netdev_queue *txq;
+
+	txq = netdev_get_tx_queue(priv->dev, ring);
+	txq->trans_start = jiffies + priv->dev->watchdog_timeo;
+}
+
+/**
+ *
+ * stmmac_txrx_ring_enable() - allocate resources and run ring. Start DMA engine
+ * @priv: gemac main structure
+ * @ring: number of associated ring
+ *
+ * Return: 0 - ok	-ENOMEM - failure
+ */
+static int stmmac_txrx_ring_enable(struct stmmac_priv *priv, int ring)
+{
+	struct stmmac_channel *ch = &priv->channel[ring];
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[ring];
+	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
+	bool enable_gemac = false;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	/* Sanity check */
+	if (ring > maxq)
+		return -EPERM;
+
+	if (priv->num_xsk_umems_used && priv->xsk_umems[ring]) {
+		/* Allocate UMEM resources */
+		DBG("%s: UMEM memory model enable\n", __FUNCTION__);
+
+		priv->zca.free = stmmac_zca_free;
+		WARN_ON(xdp_rxq_info_reg_mem_model(&priv->rx_queue[ring].xdp_rxq,
+						   MEM_TYPE_ZERO_COPY,
+						   &priv->zca));
+
+		if (alloc_dma_rx_desc_resources_xsk(priv))
+			goto err;
+		stmmac_alloc_frames_for_xsk(priv, ring);
+
+		stmmac_init_rx_service_timer(priv);
+	} else {
+		/* Allocate resources in case of using pool of pages */
+		DBG("%s: page pool memory model enable\n", __FUNCTION__);
+
+		if (alloc_dma_rx_desc_resources(priv))
+			goto err;
+		init_dma_desc_rings(priv->dev, GFP_KERNEL);
+
+		/* Do restrict setup instead of full because driver isn't ready to run */
+		stmmac_hw_restrict_setup(priv->dev, true);
+
+		enable_gemac = true;
+	}
+
+	stmmac_reset_watchdog_envent(priv, ring);
+	netif_tx_start_queue(netdev_get_tx_queue(priv->dev, ring));
+
+	stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+			    rx_q->dma_rx_phy, ring);
+
+	rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+			     (DMA_RX_SIZE * sizeof(struct dma_desc));
+	stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+			       rx_q->rx_tail_addr, ring);
+
+	/* In case of UMEM this variables will be overridden in initial refill */
+	rx_q->cur_rx = 0;
+	rx_q->dirty_rx = 0;
+
+	/* Ready to start NAPIs */
+	if (ring < rx_queues_cnt)
+		napi_enable(&ch->rx_napi);
+	if (ring < tx_queues_cnt)
+		napi_enable(&ch->tx_napi);
+
+	/* Enable GEMAC engine here in case of using page pool */
+	if (enable_gemac) {
+		stmmac_mac_set(priv, priv->ioaddr, true);
+		stmmac_start_all_dma(priv);
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+
+err:
+	pr_err("AF_XDP: can not enable ring %d\n", ring);
+	return -ENOMEM;
+}
+
+/**
+ * stmmac_umem_enable() - allocate resources and enable UMEM
+ * @priv: gemac main structure
+ * @umem: pointer to socket UMEM representation
+ * @qid: number of the queue to associate with
+ *
+ * Return: 0 - ok	< 0 - fail
+ */
+static int stmmac_umem_enable(struct stmmac_priv *priv, struct xdp_umem *umem,
+			      u16 qid)
+{
+	struct xdp_umem_fq_reuse *reuseq;
+	int err = -1;
+	bool if_running;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	if (qid >= priv->plat->rx_queues_to_use)
+		return -EINVAL;
+
+	err = xdp_rxq_info_reg(&priv->rx_queue[0].xdp_rxq, priv->dev, 0);
+	if (err)
+		return err;
+
+	reuseq = xsk_reuseq_prepare(DMA_RX_SIZE);
+	if (!reuseq)
+		return -ENOMEM;
+
+	if_running = netif_running(priv->dev);
+	if (if_running)
+		stmmac_txrx_ring_disable(priv, qid);
+
+	/* Setup UMEM and XDP auxiliary data */
+	if (stmmac_add_xsk_umem(priv, umem, qid))
+		return err;
+
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	err = stmmac_xsk_umem_dma_map(priv, umem);
+	if (err)
+		return err;
+
+	priv->xsk_umems[qid] = umem;
+
+	/* Enable rings */
+	if (if_running)
+		stmmac_txrx_ring_enable(priv, qid);
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * stmmac_xsk_umem_dma_unmap() - free UMEM DMA resources
+ * @priv: gemac main structure
+ * @umem: associated UMEM
+ */
+static void stmmac_xsk_umem_dma_unmap(struct stmmac_priv *priv,
+				      struct xdp_umem *umem)
+{
+	struct device *dev = priv->device;
+	unsigned int i;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+}
+
+/**
+ * stmmac_umem_disable() - free resources and disable UMEM
+ * @priv: gemac main structure
+ * @qid: number of the queue to associate with
+ *
+ * Return: 0 - ok	< 0 - fail
+ */
+static int stmmac_umem_disable(struct stmmac_priv *priv, u16 qid)
+{
+	struct xdp_umem *umem;
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[qid];
+	bool if_running;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	umem = xdp_get_umem_from_qid(priv->dev, qid);
+	if (!umem)
+		return -EINVAL;
+
+	if_running = netif_running(priv->dev);
+
+	if (if_running)
+		stmmac_txrx_ring_disable(priv, qid);
+
+	stmmac_xsk_umem_dma_unmap(priv, umem);
+	stmmac_remove_xsk_umem(priv, qid);
+
+	del_timer_sync(&rx_q->rx_init_timer);
+	del_timer_sync(&rx_q->rx_refill_timer);
+
+	if (if_running)
+		stmmac_txrx_ring_enable(priv, qid);
+
+	priv->xsk_umems = NULL;
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * stmmac_umem_setup() - wrapper for enable/disable UMEM
+ * @priv: gemac main structure
+ * @umem: pointer to socket UMEM representation
+ * @qid: number of the associated queue
+ *
+ * Return: 0 - ok	< 0 - fail
+ */
+static int stmmac_umem_setup(struct stmmac_priv *priv, struct xdp_umem *umem,
+			     u16 qid)
+{
+	return umem ? stmmac_umem_enable(priv, umem, qid) : \
+		      stmmac_umem_disable(priv, qid);
+}
+
+/**
+ * stmmac_bpf() - callback of network stack for setup bpf or enable/disable UMEM
+ * @priv: gemac main structure
+ * @bpf: network stack representation of bpf
+ *
+ * Return: 0 - ok	< 0 - fail
+ */
+int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	switch (bpf->command) {
+		case XDP_SETUP_PROG:
+			if (!priv->xsk_umems) {
+				pr_err("AF_XDP: Copy mode is not supported\n");
+				return -EPERM;
+			}
+			return stmmac_xdp_setup(dev, bpf->prog);
+		case XDP_QUERY_PROG:
+			bpf->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+			return 0;
+		case XDP_SETUP_XSK_UMEM:
+			return stmmac_umem_setup(priv, bpf->xsk.umem,
+						 bpf->xsk.queue_id);
+		default:
+			return -EINVAL;
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return -EPERM;
+}
+
+/**
+ * stmmac_xdp_xmit() - do redirect to non mapped place as external network
+ * @dev: network device to transmit
+ * @n: number of XDP frames
+ * @xdp: pointer to xdp frames array
+ * @flags: extra flags from network stack
+ *
+ * Return: number of redirected frames
+ */
+int stmmac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdp,
+		    u32 flags)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int drops = 0;
+	int result;
+	int i;
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	for (i = 0; i < n; ++i) {
+		result = stmmac_xmit_xdp_frame(priv, xdp[i]);
+		if (result != STMMAC_XDP_TX) {
+			xdp_return_frame_rx_napi(xdp[i]);
+			drops++;
+		}
+	}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return n - drops;
+}
+
+/**
+ * stmmac_xsk_wakeup() - Wake up Rx or/and Tx queue
+ * @dev: associated network device
+ * @queue_id: number of the queue
+ * @flags: sent action
+ *
+ * User space application or network stack can wake up driver in case of absence
+ * resource.
+ *
+ * Return: 0 - ok
+ */
+int stmmac_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	DBG("%s-->\n", __FUNCTION__);
+
+	/* Wake up request can be sent from poll of socket */
+
+	if (flags & XDP_WAKEUP_TX)
+		/* Run NAPI tx engine to kick transfer or clean descriptors */
+		if (likely(napi_schedule_prep(&priv->channel[0].tx_napi))) {
+			__napi_schedule(&priv->channel[queue_id].tx_napi);
+			//xsk_clear_tx_need_wakeup(priv->xsk_umems[queue_id]);
+		}
+
+	if (flags & XDP_WAKEUP_RX)
+		/* Run NAPI rx engine to start receiving or clean descriptors */
+		if (likely(napi_schedule_prep(&priv->channel[0].rx_napi))) {
+			__napi_schedule(&priv->channel[queue_id].rx_napi);
+			//xsk_clear_rx_need_wakeup(priv->xsk_umems[queue_id]);
+		}
+
+	DBG("%s<--\n", __FUNCTION__);
+
+	return 0;
+}
+
+/**
+ * stmmac_init_dma_engine_xsk() - initialize DMA engine in case of using XSK
+ * @priv: gemac main structure
+ */
+int stmmac_init_dma_engine_xsk(struct stmmac_priv *priv)
+{
+	u32 rx_channels_count = priv->plat->rx_queues_to_use;
+	u32 tx_channels_count = priv->plat->tx_queues_to_use;
+	u32 dma_csr_ch = max(rx_channels_count, tx_channels_count);
+	struct stmmac_rx_queue *rx_q;
+	struct stmmac_tx_queue *tx_q;
+	u32 chan = 0;
+	int atds = 0;
+	int ret = 0;
+
+	if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) {
+		dev_err(priv->device, "Invalid DMA configuration\n");
+		return -EINVAL;
+	}
+
+	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
+		atds = 1;
+
+	/* DMA Configuration */
+	stmmac_dma_init(priv, priv->ioaddr, priv->plat->dma_cfg, atds);
+
+	if (priv->plat->axi)
+		stmmac_axi(priv, priv->ioaddr, priv->plat->axi);
+
+	/* DMA CSR Channel configuration */
+	for (chan = 0; chan < dma_csr_ch; chan++)
+		stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);
+
+	/* DMA RX Channel Configuration */
+	for (chan = 0; chan < rx_channels_count; chan++) {
+		rx_q = &priv->rx_queue[chan];
+
+		stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+				    rx_q->dma_rx_phy, chan);
+
+		rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+				     (DMA_RX_SIZE * sizeof(struct dma_desc));
+		stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+				       rx_q->rx_tail_addr, chan);
+	}
+
+	/* DMA TX Channel Configuration */
+	for (chan = 0; chan < tx_channels_count; chan++) {
+		tx_q = &priv->tx_queue[chan];
+
+		stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+				    tx_q->dma_tx_phy, chan);
+
+		tx_q->tx_tail_addr = tx_q->dma_tx_phy;
+		stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+				       tx_q->tx_tail_addr, chan);
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.h
new file mode 100644
index 0000000000000..534c8c4e8db5f
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xsk.h
@@ -0,0 +1,49 @@
+#ifndef _STMMAC_XSK_H_
+#define _STMMAC_XSK_H_
+
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include <linux/bpf_trace.h>
+#include "stmmac.h"
+
+//#define DEBUG_XSK
+#ifdef DEBUG_XSK
+#define DBG(...)	pr_info(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
+#define STMMAC_XDP_PASS		0
+#define STMMAC_XDP_CONSUMED	BIT(0)
+#define STMMAC_XDP_TX		BIT(1)
+#define STMMAC_XDP_REDIR	BIT(2)
+
+#define STMMAC_TX_SOURCE_RESET	0
+#define STMMAC_TX_SOURCE_SKB	0
+#define STMMAC_TX_SOURCE_UMEM	1
+#define STMMAC_TX_SOURCE_FRAME	2
+
+#define STMMAC_TX_XMIT_SAFE_AMOUNT		40
+
+/* Refill timer restart time */
+#define STMMAC_REFILL_GREEDILY_MS		1
+#define STMMAC_REFILL_MS			500
+#define STMMAC_INITIAL_REFILL_MS		500
+
+#define STMMAC_REFILL_GREEDILY_THRESHOLD	10
+
+#define DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+/* NDO prototypes */
+int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf);
+int stmmac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags);
+int stmmac_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+
+/* Inner usage */
+inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue);
+int stmmac_rx_xsk(struct stmmac_priv *priv, int limit, u32 queue);
+int stmmac_xdp_transmit_zc(struct stmmac_priv *priv, int napi_budget);
+int stmmac_init_dma_engine_xsk(struct stmmac_priv *priv);
+
+#endif
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index dcf2051ef2c04..38bacb33e9d51 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -139,6 +139,13 @@ config MDIO_GPIO
 	  To compile this driver as a module, choose M here: the module
 	  will be called mdio-gpio.
 
+config MDIO_GPIO_BAIKAL
+	tristate "GPIO lib-based bitbanged MDIO buses with OF support"
+	depends on MDIO_BITBANG
+	depends on GPIOLIB && OF
+	---help---
+	  Supports GPIO lib-based MDIO buses with OF support.
+
 config MDIO_HISI_FEMAC
 	tristate "Hisilicon FEMAC MDIO bus controller"
 	depends on HAS_IOMEM && OF_MDIO
@@ -258,6 +265,13 @@ config SFP
 	depends on HWMON || HWMON=n
 	select MDIO_I2C
 
+config 88X2222_PHY
+	tristate "Driver for Marvell 88X2222 Transceiver"
+	---help---
+	  Support for Marvell Integrated Dual-port
+	  Multi-speed Ethernet Transceivers.
+	  Currently supports 88x2222
+
 config ADIN_PHY
 	tristate "Analog Devices Industrial Ethernet PHYs"
 	help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index a03437e091f3b..6450b979c40c1 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
 obj-$(CONFIG_MDIO_BUS_MUX_MULTIPLEXER) += mdio-mux-multiplexer.o
 obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
+obj-$(CONFIG_MDIO_GPIO_BAIKAL)	+= mdio-gpio-baikal.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
 obj-$(CONFIG_MDIO_I2C)		+= mdio-i2c.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
@@ -93,4 +94,5 @@ obj-$(CONFIG_SMSC_PHY)		+= smsc.o
 obj-$(CONFIG_STE10XP)		+= ste10Xp.o
 obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
 obj-$(CONFIG_VITESSE_PHY)	+= vitesse.o
-obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
+obj-$(CONFIG_XILINX_GMII2RGMII)	+= xilinx_gmii2rgmii.o
+obj-$(CONFIG_88X2222_PHY)	+= mv88x2222.o
diff --git a/drivers/net/phy/mdio-gpio-baikal.c b/drivers/net/phy/mdio-gpio-baikal.c
new file mode 100644
index 0000000000000..81a21662a69fb
--- /dev/null
+++ b/drivers/net/phy/mdio-gpio-baikal.c
@@ -0,0 +1,645 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal Electronics SFP+ mezzanine card MDIO bus emulation using GPIO.
+ * Supports OpenFirmware.
+ *
+ * Based on Bitbanged MDIO support driver.
+ * drivers/net/phy/mdio-bitbang.c
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor
+ *
+ * Based on CPM2 MDIO code which is:
+ *
+ * Copyright (c) 2003 Intracom S.A.
+ *  by Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2005 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Paritaly based on GPIO based MDIO bitbang driver.
+ * drivers/net/phy/mdio-gpio.c
+ *
+ * Copyright (C) 2015-2022 Baikal Electronics, JSC
+ * Author: Dmitry Dunaev <dmitry.dunaev@baikalelectronics.ru>
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_mdio.h>
+
+#define MDIO_READ		2
+#define MDIO_WRITE		1
+
+#define MDIO_C45		(1 << 15)
+#define MDIO_C45_ADDR		(MDIO_C45 | 0)
+#define MDIO_C45_WRITE		(MDIO_C45 | 1)
+#define MDIO_C45_READ		(MDIO_C45 | 3)
+
+/*
+ * Minimum MDC period is 400 ns, plus some margin for error.
+ * MDIO_DELAY is done twice per period.
+ * Baikal SoC GPIO pins trigger clock is 1 MHz.
+ */
+#define MDIO_DELAY		14
+
+/*
+ * The PHY may take up to 300 ns to produce data, plus some margin for error.
+ * Baikal SoC GPIO pins trigger clock is 1 MHz.
+ */
+#define MDIO_READ_DELAY_US	10
+#define MDIO_RESET_DELAY_US	100
+
+/* Default GPIO trigger freq is 1 MHz */
+#define MDIO_TRIG_FREQ	1000000
+
+/* Basic driver function */
+struct baikal_mdio_data {
+	struct phy_device *phydev;
+	struct mii_bus *mii;
+	struct clk *clk;
+	int mdc, mdio, mdo, rst;
+	int mdc_active_low, mdio_active_low;
+	int mdo_active_low, rst_active_low;
+	unsigned int delay, read_delay, reset_delay;
+	/* PHY addresses to be ignored when probing */
+	unsigned int phy_mask;
+	/* IRQ mask */
+	int irqs[PHY_MAX_ADDR];
+};
+
+/* Physical level of MDIO bus */
+static inline void baikal_mdio_dir(struct baikal_mdio_data *data, int dir)
+{
+	if (data->mdo >= 0) {
+		/*
+		 * Separate output pin. Always set its value to high
+		 * when changing direction. If direction is input,
+		 * assume the pin serves as pull-up. If direction is
+		 * output, the default value is high.
+		 */
+		gpio_set_value(data->mdo, 1 ^ data->mdo_active_low);
+		return;
+	}
+
+	if (dir) {
+		gpio_direction_output(data->mdio, 1 ^ data->mdio_active_low);
+	} else {
+		gpio_direction_input(data->mdio);
+	}
+}
+
+static inline int baikal_mdio_get(struct baikal_mdio_data *data)
+{
+	return gpio_get_value(data->mdio) ^ data->mdio_active_low;
+}
+
+static inline void baikal_mdio_set(struct baikal_mdio_data *data, int what)
+{
+	if (data->mdo >= 0) {
+		gpio_set_value(data->mdo, what ^ data->mdo_active_low);
+	} else {
+		gpio_set_value(data->mdio, what ^ data->mdio_active_low);
+	}
+}
+
+static inline void baikal_mdc_set(struct baikal_mdio_data *data, int what)
+{
+	gpio_set_value(data->mdc, what ^ data->mdc_active_low);
+}
+
+/* Logical level of MDIO bus */
+
+/* MDIO must already be configured as output */
+static void baikal_mdio_send_bit(struct baikal_mdio_data *data, int val)
+{
+	baikal_mdio_set(data, val);
+	udelay(MDIO_DELAY);
+	baikal_mdc_set(data, 1);
+	udelay(MDIO_DELAY);
+	baikal_mdc_set(data, 0);
+}
+
+/* MDIO must already be configured as input */
+static int baikal_mdio_get_bit(struct baikal_mdio_data *data)
+{
+	udelay(MDIO_DELAY);
+	baikal_mdc_set(data, 1);
+	udelay(MDIO_DELAY);
+	baikal_mdc_set(data, 0);
+	return baikal_mdio_get(data);
+}
+
+/* MDIO must already be configured as output */
+static void baikal_mdio_send_num(struct baikal_mdio_data *data,
+				 u16 val, int bits)
+{
+	int i;
+
+	baikal_mdio_dir(data, 1);
+
+	for (i = bits - 1; i >= 0; i--) {
+		baikal_mdio_send_bit(data, (val >> i) & 1);
+	}
+}
+
+/* MDIO must already be configured as input */
+static u16 baikal_mdio_get_num(struct baikal_mdio_data *data, int bits)
+{
+	int i;
+	u16 ret = 0;
+
+	baikal_mdio_dir(data, 0);
+
+	for (i = bits - 1; i >= 0; i--) {
+		ret <<= 1;
+		ret |= baikal_mdio_get_bit(data);
+	}
+
+	return ret;
+}
+
+/*
+ * Utility to send the preamble, address, and
+ * register (common to read and write).
+ */
+static void baikal_mdio_cmd(struct baikal_mdio_data *data,
+			    int op, u8 phy, u8 reg)
+{
+	int i;
+
+	baikal_mdio_dir(data, 1);
+	/*
+	 * Send a 32 bit preamble ('1's) with an extra '1' bit for good
+	 * measure. The IEEE spec says this is a PHY optional
+	 * requirement. This means that we are doing more preambles
+	 * than we need, but it is safer and will be much more robust.
+	 */
+	for (i = 0; i < 32; i++) {
+		baikal_mdio_send_bit(data, 1);
+	}
+
+	/*
+	 * Send the start bit (01) and the read opcode (10) or write (10).
+	 * Clause 45 operation uses 00 for the start and 11, 10 for read/write.
+	 */
+	baikal_mdio_send_bit(data, 0);
+	if (op & MDIO_C45) {
+		baikal_mdio_send_bit(data, 0);
+	} else {
+		baikal_mdio_send_bit(data, 1);
+	}
+
+	baikal_mdio_send_bit(data, (op >> 1) & 1);
+	baikal_mdio_send_bit(data, (op >> 0) & 1);
+
+	baikal_mdio_send_num(data, phy, 5);
+	baikal_mdio_send_num(data, reg, 5);
+}
+
+/*
+ * In clause 45 mode all commands are prefixed by MDIO_ADDR to specify the
+ * lower 16 bits of the 21 bit address. This transfer is done identically
+ * to a MDIO_WRITE except for a different code. To enable clause 45 mode or
+ * MII_ADDR_C45 into the address. Theoretically clause 45 and normal devices
+ * can exist on the same bus. Normal devices should ignore the MDIO_ADDR phase.
+ */
+static int baikal_mdio_cmd_addr(struct baikal_mdio_data *data, int phy, u32 addr)
+{
+	unsigned int dev_addr = (addr >> 16) & 0x1f;
+	unsigned int reg = addr & 0xffff;
+
+	baikal_mdio_cmd(data, MDIO_C45_ADDR, phy, dev_addr);
+
+	/* send the turnaround (10) */
+	baikal_mdio_send_bit(data, 1);
+	baikal_mdio_send_bit(data, 0);
+
+	baikal_mdio_send_num(data, reg, 16);
+
+	baikal_mdio_dir(data, 0);
+	baikal_mdio_get_bit(data);
+
+	return dev_addr;
+}
+
+static int baikal_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct baikal_mdio_data *data = bus->priv;
+	int ret, i;
+
+	if (reg & MII_ADDR_C45) {
+		reg = baikal_mdio_cmd_addr(data, phy, reg);
+		baikal_mdio_cmd(data, MDIO_C45_READ, phy, reg);
+	} else {
+		baikal_mdio_cmd(data, MDIO_READ, phy, reg);
+	}
+
+	baikal_mdio_dir(data, 0);
+
+	/* check the turnaround bit: the PHY should be driving it to zero */
+	if (baikal_mdio_get_bit(data) != 0) {
+		/*
+		 * PHY didn't drive TA low.
+		 * Flush any bits it may be trying to send.
+		 */
+		for (i = 0; i < 32; i++) {
+			baikal_mdio_get_bit(data);
+		}
+
+		return 0xffff;
+	}
+
+	ret = baikal_mdio_get_num(data, 16);
+	baikal_mdio_get_bit(data);
+	return ret;
+}
+
+static int baikal_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
+{
+	struct baikal_mdio_data *data = bus->priv;
+
+	if (reg & MII_ADDR_C45) {
+		reg = baikal_mdio_cmd_addr(data, phy, reg);
+		baikal_mdio_cmd(data, MDIO_C45_WRITE, phy, reg);
+	} else {
+		baikal_mdio_cmd(data, MDIO_WRITE, phy, reg);
+	}
+
+	/* send the turnaround (10) */
+	baikal_mdio_send_bit(data, 1);
+	baikal_mdio_send_bit(data, 0);
+
+	baikal_mdio_send_num(data, val, 16);
+
+	baikal_mdio_dir(data, 0);
+	baikal_mdio_get_bit(data);
+
+	return 0;
+}
+
+static int __maybe_unused baikal_mdio_reset(struct mii_bus *bus)
+{
+	struct baikal_mdio_data *data = bus->priv;
+
+	if (data->rst < 0) {
+		return 0;
+	}
+
+	gpio_set_value(data->rst, 1 ^ data->rst_active_low);
+	mdelay(data->reset_delay);
+
+	gpio_set_value(data->rst, 0 ^ data->rst_active_low);
+	mdelay(data->reset_delay);
+
+	return 0;
+}
+
+/* MDIO bus open firmware data */
+static void *baikal_mdio_of_get_data(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct baikal_mdio_data *pdata;
+	enum of_gpio_flags flags;
+	unsigned int freq = 0;
+	int ret;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		return NULL;
+	}
+
+	ret = of_get_named_gpio_flags(np, "mdc-pin", 0, &flags);
+	if (ret < 0) {
+		return NULL;
+	}
+
+	pdata->mdc = ret;
+	pdata->mdc_active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+	ret = of_get_named_gpio_flags(np, "mdio-pin", 0, &flags);
+	if (ret < 0) {
+		return NULL;
+	}
+
+	pdata->mdio = ret;
+	pdata->mdio_active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+	pdata->mdo = -1;
+	ret = of_get_named_gpio_flags(np, "mdo-pin", 0, &flags);
+	if (ret >= 0) {
+		pdata->mdo = ret;
+		pdata->mdo_active_low = flags & OF_GPIO_ACTIVE_LOW;
+	}
+
+	pdata->rst = -1;
+	ret = of_get_named_gpio_flags(np, "rst-pin", 0, &flags);
+	if (ret >= 0) {
+		pdata->rst = ret;
+		pdata->rst_active_low = flags & OF_GPIO_ACTIVE_LOW;
+	}
+
+	pdata->clk = of_clk_get(np, 0);
+
+	if (IS_ERR(pdata->clk)) {
+		of_property_read_u32(pdev->dev.of_node, "clock-frequency", &freq);
+	} else {
+		freq = clk_get_rate(pdata->clk);
+	}
+
+	if (!freq) {
+		freq = MDIO_TRIG_FREQ;
+	}
+
+	ret = 1000000 / freq;
+
+	pdata->read_delay  = ret > MDIO_READ_DELAY_US  ? ret : MDIO_READ_DELAY_US;
+	pdata->reset_delay = ret > MDIO_RESET_DELAY_US ? ret : MDIO_RESET_DELAY_US;
+
+	return pdata;
+}
+
+#ifdef CONFIG_ACPI
+static void *baikal_mdio_acpi_get_data(struct platform_device *pdev)
+{
+	struct baikal_mdio_data *pdata;
+	unsigned int freq;
+	int ret;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		return NULL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "clock-frequency", &freq)) {
+		freq = MDIO_TRIG_FREQ;
+	}
+
+	ret = 1000000 / freq;
+
+	pdata->read_delay  = ret > MDIO_READ_DELAY_US  ? ret : MDIO_READ_DELAY_US;
+	pdata->reset_delay = ret > MDIO_RESET_DELAY_US ? ret : MDIO_RESET_DELAY_US;
+
+	return pdata;
+}
+#else
+static void *baikal_mdio_acpi_get_data(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif
+
+/* MDIO bus init */
+static struct mii_bus *baikal_mdio_bus_init(struct device *dev,
+					    struct baikal_mdio_data *pdata,
+					    int bus_id)
+{
+	struct mii_bus *bus;
+	struct gpio_desc *desc;
+	int i;
+
+	bus = mdiobus_alloc();
+	if (!bus) {
+		dev_err(dev, "Unable to allocate MDIO bus\n");
+		goto error;
+	}
+
+	bus->read = baikal_mdio_read;
+	bus->write = baikal_mdio_write;
+	bus->priv = pdata;
+
+	bus->name = "Baikal GPIO MDIO bus";
+
+	bus->phy_mask = pdata->phy_mask;
+	memcpy(bus->irq, pdata->irqs, PHY_MAX_ADDR);
+	bus->parent = dev;
+
+	if (bus->phy_mask == ~0) {
+		dev_err(dev, "All PHY's are masked - nothing to attach\n");
+		goto error_free_bus;
+	}
+
+	for (i = 0; i < PHY_MAX_ADDR; i++) {
+		if (!bus->irq[i]) {
+			bus->irq[i] = PHY_POLL;
+		}
+	}
+
+	snprintf(bus->id, MII_BUS_ID_SIZE, "mdio-gpio%d", bus_id);
+
+	if (acpi_disabled) {
+		if (devm_gpio_request(dev, pdata->mdc, "mdc")) {
+			dev_err(dev, "MDC line (gpio%d) request failed\n", pdata->mdc);
+			goto error_free_bus;
+		}
+
+		if (devm_gpio_request(dev, pdata->mdio, "mdio")) {
+			dev_err(dev, "MDIO line (gpio%d) request failed\n", pdata->mdio);
+			goto error_free_bus;
+		}
+
+		if (pdata->mdo >= 0) {
+			if (devm_gpio_request(dev, pdata->mdo, "mdo")) {
+				goto error_free_bus;
+			}
+
+			gpio_direction_output(pdata->mdo, 1);
+			gpio_direction_input(pdata->mdio);
+		}
+
+		if (pdata->rst >= 0) {
+			if (devm_gpio_request(dev, pdata->rst, "rst")) {
+				pdata->rst= -1;
+			} else {
+				gpio_direction_output(pdata->rst, 0);
+				gpio_set_value(pdata->rst, 0);
+				mdelay(50);
+			}
+		}
+
+		gpio_direction_output(pdata->mdc, 0);
+	} else {
+		desc = devm_gpiod_get_index(dev, "mdc", 0, GPIOD_ASIS);
+		if (IS_ERR_OR_NULL(desc)) {
+			dev_err(dev, "MDC line request failed\n");
+			goto error_free_bus;
+		} else {
+			pdata->mdc = desc_to_gpio(desc);
+			pdata->mdc_active_low = gpiod_is_active_low(desc);
+			gpio_direction_output(pdata->mdc, 0);
+		}
+
+		desc = devm_gpiod_get_index(dev, "mdio", 0, GPIOD_ASIS);
+		if (IS_ERR_OR_NULL(desc)) {
+			dev_err(dev, "MDIO line request failed\n");
+			goto error_free_bus;
+		} else {
+			pdata->mdio = desc_to_gpio(desc);
+			pdata->mdio_active_low = gpiod_is_active_low(desc);
+		}
+
+		desc = devm_gpiod_get_index(dev, "mdo", 0, GPIOD_ASIS);
+		if (!IS_ERR_OR_NULL(desc)) {
+			pdata->mdo = desc_to_gpio(desc);
+			pdata->mdo_active_low = gpiod_is_active_low(desc);
+			gpio_direction_output(pdata->mdo, 1);
+			gpio_direction_input(pdata->mdio);
+		} else {
+			pdata->mdo = -1;
+		}
+
+		desc = devm_gpiod_get_index(dev, "rst", 0, GPIOD_ASIS);
+		if (!IS_ERR_OR_NULL(desc)) {
+			pdata->rst = desc_to_gpio(desc);
+			pdata->rst_active_low = gpiod_is_active_low(desc);
+			gpio_direction_output(pdata->rst, 0);
+		} else {
+			pdata->rst = -1;
+		}
+	}
+
+	dev_set_drvdata(dev, bus);
+	return bus;
+
+error_free_bus:
+	mdiobus_free(bus);
+error:
+	return NULL;
+}
+
+static int baikal_mdio_gpio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct baikal_mdio_data *pdata;
+	struct fwnode_handle *child;
+	struct mii_bus *bus;
+	int ret, bus_id;
+
+	if (dev->of_node) {
+		pdata = baikal_mdio_of_get_data(pdev);
+		bus_id = of_alias_get_id(dev->of_node, "mdio-gpio");
+		if (bus_id < 0) {
+			dev_warn(dev, "failed to get alias id\n");
+			bus_id = 0;
+		}
+	} else if (!acpi_disabled) {
+		pdata = baikal_mdio_acpi_get_data(pdev);
+		ret = device_property_read_u32(dev, "bus-id", &bus_id);
+		if (ret || bus_id < 0) {
+			dev_warn(dev, "failed to get bus-id property\n");
+			bus_id = 0;
+		}
+	} else {
+		pdata = dev_get_platdata(dev);
+		bus_id = pdev->id;
+	}
+
+	if (!pdata) {
+		dev_err(dev, "No MDIO bus platform data\n");
+		return -ENODEV;
+	}
+
+	bus = baikal_mdio_bus_init(dev, pdata, bus_id);
+	if (!bus) {
+		dev_err(dev, "MDIO bus init failed\n");
+		return -ENODEV;
+	}
+
+	if (dev->of_node) {
+		ret = of_mdiobus_register(bus, dev->of_node);
+	} else if (!acpi_disabled) {
+		bus->phy_mask = ~0;
+		bus->dev.fwnode = dev->fwnode;
+		bus->reset_delay_us = 10;
+
+		ret = mdiobus_register(bus);
+	} else {
+		ret = mdiobus_register(bus);
+	}
+
+	if (ret) {
+		dev_err(dev, "MDIO bus register failed\n");
+		goto err_mdiobus_register;
+	}
+
+	if (!acpi_disabled) {
+		struct phy_device *phy;
+		u32 addr;
+
+		device_for_each_child_node(dev, child) {
+			ret = fwnode_property_read_u32(child, "reg", &addr);
+			if (ret < 0)
+				continue;
+
+			if (addr >= PHY_MAX_ADDR)
+				continue;
+
+			phy = get_phy_device(bus, addr, true);
+			if (IS_ERR(phy)) {
+				ret = PTR_ERR(phy);
+				mdiobus_unregister(bus);
+				goto err_mdiobus_register;
+			}
+
+			phy->irq = bus->irq[addr];
+			phy->mdio.dev.fwnode = child;
+
+			ret = phy_device_register(phy);
+			if (ret) {
+				phy_device_free(phy);
+				mdiobus_unregister(bus);
+				goto err_mdiobus_register;
+			}
+
+			dev_dbg(dev, "registered phy at address %i\n", addr);
+		}
+	}
+
+	pdata->mii = bus;
+	return 0;
+
+err_mdiobus_register:
+	mdiobus_free(bus);
+	return ret;
+}
+
+static int baikal_mdio_gpio_remove(struct platform_device *pdev)
+{
+	struct mii_bus *bus = dev_get_drvdata(&pdev->dev);
+	mdiobus_unregister(bus);
+	mdiobus_free(bus);
+	return 0;
+}
+
+static struct of_device_id baikal_mdio_gpio_of_match[] = {
+	{ .compatible = "baikal,mdio-gpio", },
+	{ }
+};
+
+static struct platform_driver baikal_mdio_gpio_driver = {
+	.probe = baikal_mdio_gpio_probe,
+	.remove = baikal_mdio_gpio_remove,
+	.driver		= {
+		.name	= "baikal-mdio-gpio",
+		.of_match_table = baikal_mdio_gpio_of_match,
+	},
+};
+
+module_platform_driver(baikal_mdio_gpio_driver);
+
+MODULE_ALIAS("platform:baikal-mdio-gpio");
+MODULE_AUTHOR("Dmitry Dunaev");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Baikal Electronics MDIO bus emulation using GPIO");
diff --git a/drivers/net/phy/mv88x2222.c b/drivers/net/phy/mv88x2222.c
new file mode 100644
index 0000000000000..c9c10cee2291e
--- /dev/null
+++ b/drivers/net/phy/mv88x2222.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/net/phy/mv88x2222c
+ *
+ * Driver for Marvell Integrated Dual-port
+ * Multi-speed Ethernet Transceiver 88x2222
+ *
+ * Copyright (C) 2015-2022 Baikal Electronics, JSC
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/marvell_phy.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+
+/* 31.F002 Line side mode (ch.3.1.2, pg.46) */
+#define MV_MODE_LINE_SHF			8
+#define MV_MODE_LINE_10GBR			(0x71UL << 8)
+#define MV_MODE_LINE_10GBW			(0x74UL << 8)
+#define MV_MODE_LINE_2GBX_AN_OFF		(0x76UL << 8)
+#define MV_MODE_LINE_1GBR_AN_OFF		(0x72UL << 8)
+#define MV_MODE_LINE_1GBR_AN_ON			(0x73UL << 8)
+#define MV_MODE_LINE_SGMII_SYS_AN_OFF		(0x7cUL << 8)
+#define MV_MODE_LINE_SGMII_SYS_AN_ON		(0x7dUL << 8)
+#define MV_MODE_LINE_SGMII_NET_AN_OFF		(0x7eUL << 8)
+#define MV_MODE_LINE_SGMII_NET_AN_ON		(0x7fUL << 8)
+#define MV_MODE_LINE_DEFAULT			MV_MODE_LINE_10GBR
+#define MV_MODE_LINE_OF_NAME			"mv,line-mode"
+
+/* 31.F002 Host side mode (ch.3.1.2, pg.46) */
+#define MV_MODE_HOST_SHF			0
+#define MV_MODE_HOST_10GBR			(0x71UL << 0)
+#define MV_MODE_HOST_10GBX2			(0x72UL << 0)
+#define MV_MODE_HOST_10GBX4			(0x73UL << 0)
+#define MV_MODE_HOST_2GBX_AN_OFF		(0x76UL << 0)
+#define MV_MODE_HOST_1GBR_AN_OFF		(0x7aUL << 0)
+#define MV_MODE_HOST_1GBR_AN_ON			(0x7bUL << 0)
+#define MV_MODE_HOST_SGMII_SYS_AN_OFF		(0x7cUL << 0)
+#define MV_MODE_HOST_SGMII_SYS_AN_ON		(0x7dUL << 0)
+#define MV_MODE_HOST_SGMII_NET_AN_OFF		(0x7eUL << 0)
+#define MV_MODE_HOST_SGMII_NET_AN_ON		(0x7fUL << 0)
+#define MV_MODE_HOST_DEFAULT			MV_MODE_HOST_10GBR
+#define MV_MODE_HOST_OF_NAME			"mv,host-mode"
+
+/* 31.F402 Host side line muxing (ch.3.1.5, pg.48) */
+#define MV_ATT_10GBX2_SHF			11
+#define MV_ATT_10GBX2_LANE_0145			(0UL << 11)
+#define MV_ATT_10GBX2_LANE_0123			(1UL << 11)
+#define MV_ATT_10GBR_SHF			9
+#define MV_ATT_10GBR_LANE_0246			(0UL << 9)
+#define MV_ATT_10GBR_LANE_0123			(1UL << 9)
+#define MV_ATT_2GBR_SHF				8
+#define MV_ATT_2GBR_LANE_0246			(0UL << 8)
+#define MV_ATT_2GBR_LANE_0123			(1UL << 8)
+#define MV_ATT_1GBR_SHF				8
+#define MV_ATT_1GBR_LANE_0246			(0UL << 8)
+#define MV_ATT_1GBR_LANE_0123			(1UL << 8)
+#define MV_ATT_DEFAULT				0
+#define MV_ATT_OF_NAME				"mv,mux"
+
+/* 31.F003 Software reset (ch.3.2 pg.50) */
+#define MV_SW_RST_HOST_SHF			7
+#define MV_SW_RST_HOST				(1UL << 7)
+#define MV_SW_RST_LINE_SHF			15
+#define MV_SW_RST_LINE				(1UL << 15)
+#define MV_SW_RST_ALL				(MV_SW_RST_HOST | MV_SW_RST_LINE)
+
+/* 31.F012 GPIO data */
+#define MV_GPIO_TXDISABLE_DATA_SHF		8
+
+/* 31.F013 Tristate Control */
+#define MV_GPIO_TXDISABLE_OUTP_EN_SHF		8
+
+/* 31.F016 Interrupt type 3 */
+#define MV_GPIO_TXDISABLE_FN_SHF		3
+#define MV_GPIO_TXDISABLE_FN_GPIO		0x1
+
+/* Devices in package and registers */
+#define MV_DEV_10GBW_IRQ_ENABLE			0x8000
+#define MV_DEV_10GBW_IRQ_STATUS			0x8001
+#define MV_DEV_10GBW_IRQ_REALTIME		0x8002
+
+#define MV_DEV_10GBR_ANEG			0x2000
+#define MV_DEV_10GBR_IRQ_ENABLE			0x8000
+#define MV_DEV_10GBR_IRQ_STATUS			0x8001
+#define MV_DEV_10GBR_IRQ_REALTIME		0x8002
+
+#define MV_DEV_GBX_IRQ_ENABLE			0xa000
+#define MV_DEV_GBX_IRQ_STATUS			0xa001
+#define MV_DEV_GBX_IRQ_REALTIME			0xa002
+
+#define MV_DEV_MISC_IRQ_ENABLE			0xf00a
+#define MV_DEV_MISC_IRQ_STATUS			0xf00b
+
+#define MV_DEV_GPIO_DATA			0xf012
+#define MV_DEV_GPIO_TRISTATE_CTL		0xf013
+#define MV_DEV_GPIO_INTERRUPT_TYPE_3		0xf016
+
+#define MV_DEV_CHIP_HOST_LINE			0xf002
+#define MV_DEV_CHIP_RESET			0xf003
+#define MV_DEV_CHIP_MUX				0xf402
+#define MV_DEV_CHIP_IRQ_STATUS			0xf420
+#define MV_DEV_CHIP_IRQ_CONTROL			0xf421
+
+#define MV_RESET_DELAY_US			500
+
+struct mode {
+	unsigned mode_num;
+	char mode_name[16];
+};
+
+static struct mode line_modes[] = {
+	{MV_MODE_LINE_10GBR, "KR"},
+	{MV_MODE_LINE_10GBW, "10GBW"},
+	{MV_MODE_LINE_2GBX_AN_OFF, "2GBX_AN_OFF"},
+	{MV_MODE_LINE_1GBR_AN_OFF, "1GBR_AN_OFF"},
+	{MV_MODE_LINE_1GBR_AN_ON, "1GBR_AN_ON"},
+	{MV_MODE_LINE_SGMII_SYS_AN_OFF, "SGMII_SYS_AN_OFF"},
+	{MV_MODE_LINE_SGMII_SYS_AN_ON, "SGMI_SYS_AN_ON"},
+	{MV_MODE_LINE_SGMII_NET_AN_OFF, "SMGII_NET_AN_OFF"},
+	{MV_MODE_LINE_SGMII_NET_AN_ON, "SGMII_NET_AN_ON"}
+};
+
+static struct mode host_modes[] = {
+	{MV_MODE_HOST_10GBR, "KR"},
+	{MV_MODE_HOST_10GBX2, "10GBX2"},
+	{MV_MODE_HOST_10GBX4, "KX4"},
+	{MV_MODE_HOST_2GBX_AN_OFF, "2GBX_AN_OFF"},
+	{MV_MODE_HOST_1GBR_AN_OFF, "1GBR_AN_OFF"},
+	{MV_MODE_HOST_1GBR_AN_ON, "1GBR_AN_ON"},
+	{MV_MODE_HOST_SGMII_SYS_AN_OFF, "SGMII_SYS_AN_OFF"},
+	{MV_MODE_HOST_SGMII_SYS_AN_ON, "SGMII_SYS_AN_ON"},
+	{MV_MODE_HOST_SGMII_NET_AN_OFF, "SGMII_NE_AN_OFF"},
+	{MV_MODE_HOST_SGMII_NET_AN_ON, "SGMII_NET_AN_ON"}
+};
+
+struct mv88x2222_data {
+	int irq;
+	int rst_active_low, irq_active_low;
+	int line_mode, host_mode, mux;
+};
+
+static void *marvell_of_get_data(struct phy_device *phydev)
+{
+	struct device_node *np = phydev->mdio.dev.of_node;
+	struct mv88x2222_data *pdata;
+	enum of_gpio_flags flags;
+	int ret;
+	char mode[32];
+	unsigned int i = 0;
+	const char *pm = mode;
+
+	pdata = devm_kzalloc(&phydev->mdio.dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	ret = of_get_named_gpio_flags(np, "irq-pin", 0, &flags);
+	if (ret >= 0) {
+		pdata->irq = ret;
+		pdata->irq_active_low = flags & OF_GPIO_ACTIVE_LOW;
+		dev_info(&phydev->mdio.dev, "irq gpio pin=%d", ret);
+	}
+
+	pdata->line_mode = MV_MODE_LINE_DEFAULT;
+	ret = of_property_read_string(np, MV_MODE_LINE_OF_NAME, &pm);
+	if (!ret) {
+		for (i = 0; i < sizeof(line_modes) / sizeof(struct mode); ++i) {
+			if (strcasecmp(line_modes[i].mode_name, pm) == 0) {
+				pdata->line_mode = line_modes[i].mode_num;
+				break;
+			}
+		}
+	}
+
+	pdata->host_mode = MV_MODE_HOST_DEFAULT;
+	ret = of_property_read_string(np, MV_MODE_HOST_OF_NAME, &pm);
+	if (!ret) {
+		for (i = 0; i < sizeof(host_modes) / sizeof(struct mode); ++i) {
+			if (strcasecmp(host_modes[i].mode_name, pm) == 0) {
+				pdata->host_mode = host_modes[i].mode_num;
+				break;
+			}
+		}
+	}
+
+	/* Default value at now */
+	pdata->mux = MV_ATT_DEFAULT;
+	return pdata;
+}
+
+#if CONFIG_ACPI
+static void *marvell_acpi_get_data(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct mv88x2222_data *pdata;
+	int ret;
+	char mode[32];
+	unsigned int i = 0;
+	const char *pm = mode;
+
+	pdata = devm_kzalloc(&phydev->mdio.dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	pdata->line_mode = MV_MODE_LINE_DEFAULT;
+	ret = device_property_read_string(dev, MV_MODE_LINE_OF_NAME, &pm);
+	if (!ret) {
+		for (i = 0; i < sizeof(line_modes) / sizeof(struct mode); ++i) {
+			if (strcasecmp(line_modes[i].mode_name, pm) == 0) {
+				pdata->line_mode = line_modes[i].mode_num;
+				break;
+			}
+		}
+	}
+
+	pdata->host_mode = MV_MODE_HOST_DEFAULT;
+	ret = device_property_read_string(dev, MV_MODE_HOST_OF_NAME, &pm);
+	if (!ret) {
+		for (i = 0; i < sizeof(host_modes) / sizeof(struct mode); ++i) {
+			if (strcasecmp(host_modes[i].mode_name, pm) == 0) {
+				pdata->host_mode = host_modes[i].mode_num;
+				break;
+			}
+		}
+	}
+
+	/* Default value at now */
+	pdata->mux = MV_ATT_DEFAULT;
+	return pdata;
+}
+#else
+static void *marvell_acpi_get_data(struct phy_device *phydev)
+{
+	return NULL;
+}
+#endif
+
+static int marvell_soft_reset(struct phy_device *phydev)
+{
+	int ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_RESET,
+				MV_SW_RST_ALL);
+	int count = 50;
+
+	if (ret) {
+		dev_warn(&phydev->mdio.dev, "software reset failed\n");
+		return ret;
+	}
+
+	do {
+		usleep_range(MV_RESET_DELAY_US, MV_RESET_DELAY_US + 100);
+		ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_RESET);
+	} while ((ret & MV_SW_RST_ALL) || count--);
+
+	return 0;
+}
+
+static int marvell_config_init(struct phy_device *phydev)
+{
+	struct mv88x2222_data *pdata = phydev->priv;
+	int ret;
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_HOST_LINE,
+			    pdata->line_mode | pdata->host_mode);
+	if (ret)
+		return 1;
+
+	phydev->speed = SPEED_10000;
+	phydev->duplex = DUPLEX_FULL;
+
+	/* This must be done after mode set */
+	ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_HOST_LINE);
+	ret |= 0x8000;
+	phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_HOST_LINE, ret);
+
+	marvell_soft_reset(phydev);
+
+	dev_info(&phydev->mdio.dev, "phy(%d, %x)=%x\n", MDIO_MMD_VEND2,
+		 MV_DEV_CHIP_HOST_LINE,
+		 phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_HOST_LINE));
+
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Backplane_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+			phydev->supported, 1);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported, 1);
+
+	phydev->pause = 0;
+	phydev->asym_pause = 0;
+	phydev->interface = PHY_INTERFACE_MODE_XGMII;
+	phydev->duplex = DUPLEX_FULL;
+
+	switch (pdata->line_mode) {
+	case MV_MODE_LINE_10GBR:
+	case MV_MODE_LINE_10GBW:
+		phydev->speed = SPEED_10000;
+		break;
+	case MV_MODE_LINE_2GBX_AN_OFF:
+		phydev->speed = SPEED_2500;
+		break;
+	default:
+		phydev->speed = SPEED_1000;
+		break;
+	}
+
+	return 0;
+}
+
+static int marvell_adjust_tx(struct phy_device *phydev)
+{
+	int reg;
+	int line_link = 1;
+
+	/* Switch tristate to "write to pin/read from register" */
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_TRISTATE_CTL);
+	phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_TRISTATE_CTL,\
+	reg | (1 << MV_GPIO_TXDISABLE_OUTP_EN_SHF));
+
+	/* Switch off TX_DISABLE */
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA);
+	phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA, reg & \
+		~(1 << MV_GPIO_TXDISABLE_DATA_SHF));
+
+	/* Check if opto-cable is plugged */
+	reg = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if ((reg < 0) || !(reg & MDIO_STAT1_LSTATUS))
+		line_link = 0;
+
+	if (line_link) {
+		/* It's fine */
+		return 0;
+	} else {
+		/* Switch on TX_DISABLE */
+		reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA);
+		phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA, reg | \
+			(1 << MV_GPIO_TXDISABLE_DATA_SHF));
+	}
+
+	return 1;
+}
+
+static int marvell_update_link(struct phy_device *phydev)
+{
+	int reg;
+	int host_mode = 0;
+	int line_mode = 0;
+
+	/* Default link status */
+	phydev->link = 1;
+
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_CHIP_HOST_LINE);
+	if (reg < 0) {
+		phydev->link = 0;
+		return 0;
+	}
+
+	host_mode = reg & 0x007f;
+	line_mode = reg & 0x7f00;
+
+	/* Read host link status */
+	if (host_mode == MV_MODE_HOST_10GBX4)
+		reg = phy_read_mmd(phydev, MDIO_MMD_PHYXS, 0x1001);
+	else
+		reg = phy_read_mmd(phydev, MDIO_MMD_PHYXS, MDIO_STAT1);
+
+	if ((reg < 0) || !(reg & MDIO_STAT1_LSTATUS))
+		phydev->link = 0;
+
+	/* Read line link status */
+	if (line_mode == MV_MODE_LINE_10GBR)
+		reg = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	else
+		reg = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x2001);
+
+	if ((reg < 0) || !(reg & MDIO_STAT1_LSTATUS))
+		phydev->link = 0;
+
+	/*
+	 * PMAPMD link status is always broken
+	 * later we need to update this driver.
+	 */
+	reg = marvell_adjust_tx(phydev);
+	if (reg < 0)
+		phydev->link = 0;
+
+	return 0;
+}
+
+static int marvell_read_status(struct phy_device *phydev)
+{
+	int reg;
+
+	/* Update the link, but return if there was an error */
+	reg = marvell_update_link(phydev);
+	if (reg < 0)
+		return reg;
+
+	/* Read line control reg */
+	reg = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
+	if (reg < 0)
+		return reg;
+
+	return 0;
+}
+
+static int marvell_config_aneg(struct phy_device *phydev)
+{
+	linkmode_copy(phydev->advertising, phydev->supported);
+	return 0;
+}
+
+static int marvell_probe(struct phy_device *phydev)
+{
+	struct mv88x2222_data *pdata = NULL;
+	int reg = 0;
+
+	if (phydev->mdio.dev.of_node)
+		pdata = marvell_of_get_data(phydev);
+	else if (!acpi_disabled)
+		pdata = marvell_acpi_get_data(phydev);
+
+	if (!pdata) {
+		dev_err(&phydev->mdio.dev, "No PHY platform data\n");
+		return -ENODEV;
+	}
+
+	phydev->priv = pdata;
+	dev_info(&phydev->mdio.dev, "probed %s at 0x%02x\n",
+		 phydev->drv->name, phydev->mdio.addr);
+
+	reg = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x0002);
+	return 0;
+}
+
+static int marvell_suspend(struct phy_device *phydev)
+{
+	int reg;
+	mutex_lock(&phydev->lock);
+
+	/* Switch tristate to "write to pin/read from register" */
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_TRISTATE_CTL);
+	phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_TRISTATE_CTL,\
+	reg | (1 << MV_GPIO_TXDISABLE_OUTP_EN_SHF));
+
+	/* Switch on TX_DISABLE */
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA);
+	phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_DEV_GPIO_DATA, reg | \
+		(1 << MV_GPIO_TXDISABLE_DATA_SHF));
+	/* TBD Probably switch to lowpower mode */
+
+	mutex_unlock(&phydev->lock);
+	return 0;
+}
+
+static int marvell_match_phy_device(struct phy_device *phydev)
+{
+	unsigned phy_id;
+
+	phy_id = phydev->c45_ids.device_ids[MDIO_MMD_PCS] & MARVELL_PHY_ID_MASK;
+
+	return (phy_id == MARVELL_PHY_ID_88X2222) ||
+	       (phy_id == MARVELL_PHY_ID_88X2222R);
+}
+
+static struct phy_driver marvell_drivers[] = {
+	{
+		.phy_id = MARVELL_PHY_ID_88X2222,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88X2222",
+		.features = 0,
+		.config_init = marvell_config_init,
+		.config_aneg = marvell_config_aneg,
+		.probe = marvell_probe,
+		.match_phy_device = marvell_match_phy_device,
+		.read_status = marvell_read_status,
+		.soft_reset = marvell_soft_reset,
+		.resume = genphy_resume,
+		.suspend = marvell_suspend,
+	},
+};
+module_phy_driver(marvell_drivers);
+
+static struct mdio_device_id __maybe_unused marvell_tbl[] = {
+	{ MARVELL_PHY_ID_88X2222, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88X2222R, MARVELL_PHY_ID_MASK },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, marvell_tbl);
+MODULE_DESCRIPTION("Marvell Ethernet Transceiver driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 0ba988b5b5bc6..647b1b9e93869 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -209,6 +209,37 @@ config PCIE_ARTPEC6_EP
 	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
 	  endpoint mode. This uses the DesignWare core.
 
+config PCIE_BAIKAL
+	bool
+
+config PCIE_BAIKAL_HOST
+	bool "Baikal SoCs PCIe controller - Host Mode"
+	depends on ARCH_BAIKAL
+	depends on (OF || (ACPI && PCI_QUIRKS)) && HAS_IOMEM
+	depends on PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	select PCIE_BAIKAL
+	help
+	  Enables support for the PCIe controller in Baikal SoCs to work in
+	  host mode. There are two instances of PCIe controller in Baikal SoCs.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCIE_BAIKAL_HOST must be selected and in order
+	  to enable device-specific features PCIE_BAIKAL_EP must be selected.
+	  This uses the DesignWare core.
+
+config PCIE_BAIKAL_EP
+	bool "Baikal SoCs PCIe controller - Endpoint Mode"
+	depends on PCI_ENDPOINT
+	select PCIE_DW_EP
+	select PCIE_BAIKAL
+	help
+	  Enables support for the PCIe controller in Baikal SoCs to work in
+	  host mode. There are two instances of PCIe controller in Baikal SoCs.
+	  This controller can work either as EP or RC. In order to enable
+	  host-specific features PCIE_BAIKAL_HOST must be selected and in order
+	  to enable device-specific features PCIE_BAIKAL_EP must be selected.
+	  This uses the DesignWare core.
+
 config PCIE_KIRIN
 	depends on OF && (ARM64 || COMPILE_TEST)
 	bool "HiSilicon Kirin series SoCs PCIe controllers"
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index 69faff371f118..355b5a093b4b3 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -13,12 +13,16 @@ obj-$(CONFIG_PCI_LAYERSCAPE_EP) += pci-layerscape-ep.o
 obj-$(CONFIG_PCIE_QCOM) += pcie-qcom.o
 obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o
 obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o
+obj-$(CONFIG_PCIE_BAIKAL) += pcie-baikal.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
 obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 
+pcie-baikal-objs := pcie-baikal-core.o \
+                    pcie-baikal-tune.o
+
 # The following drivers are for devices that use the generic ACPI
 # pci_root.c driver but don't support standard ECAM config access.
 # They contain MCFG quirks to replace the generic ECAM accessors with
@@ -32,4 +36,5 @@ obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 ifdef CONFIG_PCI
 obj-$(CONFIG_ARM64) += pcie-al.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
+obj-$(CONFIG_ARM64) += pcie-baikal-acpi.o
 endif
diff --git a/drivers/pci/controller/dwc/pcie-baikal-acpi.c b/drivers/pci/controller/dwc/pcie-baikal-acpi.c
new file mode 100644
index 0000000000000..5cdd8c98ad205
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-baikal-acpi.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021-2022 Baikal Electronics, JSC
+ * Author: Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>
+ */
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+
+#include <linux/pci-ecam.h>
+
+#if defined(CONFIG_PCIE_BAIKAL)
+extern struct pci_ecam_ops baikal_m_pcie_ecam_ops;
+extern struct pci_ecam_ops baikal_s_pcie_ecam_ops;
+#else
+struct pci_ecam_ops baikal_m_pcie_ecam_ops = {
+	.bus_shift	= 20,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write
+	}
+};
+
+struct pci_ecam_ops baikal_s_pcie_ecam_ops = {
+	.bus_shift	= 20,
+	.pci_ops	= {
+		.map_bus	= pci_ecam_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write
+	}
+};
+#endif
+
+#endif
diff --git a/drivers/pci/controller/dwc/pcie-baikal-core.c b/drivers/pci/controller/dwc/pcie-baikal-core.c
new file mode 100644
index 0000000000000..a40b10ac0d038
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-baikal-core.c
@@ -0,0 +1,2300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe controller driver for Baikal Electronics SoCs
+ *
+ * Copyright (C) 2019-2023 Baikal Electronics, JSC
+ * Authors: Pavel Parkhomenko <pavel.parkhomenko@baikalelectronics.ru>
+ *          Aleksandr Efimov <alexander.efimov@baikalelectronics.ru>
+ */
+
+#include <linux/acpi.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "pcie-designware.h"
+#include "pcie-baikal.h"
+
+#define BAIKAL_PCIE_LTSSM_MASK		0x3f
+#define BAIKAL_PCIE_LTSSM_STATE_L0	0x11
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+struct baikal_pcie_acpi_data {
+	u64		mem_base;
+	phys_addr_t	mem_bus_addr;
+	u32		mem_size;
+};
+#else
+static inline u16 bm1000_pcie_get_segment_acpi(struct device *dev)
+{
+	return 0;
+}
+
+static inline void bm1000_pcie_setup_rc_acpi(struct pcie_port *pp)
+{
+}
+
+static inline void bs1000_pcie_setup_rc_acpi(struct pcie_port *pp)
+{
+}
+#endif
+
+struct baikal_pcie_of_data {
+	enum dw_pcie_device_mode	mode;
+	struct dw_pcie_ops		dw_pcie_ops;
+	int (*get_resources)(struct platform_device *pdev,
+			     struct dw_pcie *pci,
+			     const struct baikal_pcie_of_data *data);
+	int (*add_pcie_port)(struct platform_device *pdev);
+};
+
+static const char *baikal_pcie_link_speed_str(const unsigned speed)
+{
+	static const char *speed_str[] = {"2.5", "5.0", "8.0", "16.0"};
+
+	if (speed > 0 && speed <= ARRAY_SIZE(speed_str))
+		return speed_str[speed - 1];
+
+	return "???";
+}
+
+static void baikal_pcie_link_print_status(struct dw_pcie *pci)
+{
+	struct device *dev = pci->dev;
+	u16 exp_cap_off;
+	u32 reg;
+
+	if (!pci->ops->link_up(pci)) {
+		dev_info(dev, "link is down\n");
+		return;
+	}
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKSTA);
+
+	dev_info(dev, "current link is %s GT/s%s, x%u\n",
+		 baikal_pcie_link_speed_str(FIELD_GET(PCI_EXP_LNKSTA_CLS, reg)),
+		 FIELD_GET(PCI_EXP_LNKSTA_LT, reg) ? " (training)" : "",
+		 FIELD_GET(PCI_EXP_LNKSTA_NLW, reg));
+}
+
+static bool baikal_pcie_link_is_training(struct dw_pcie *pci)
+{
+	u16 exp_cap_off;
+	u32 reg;
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKSTA);
+	return FIELD_GET(PCI_EXP_LNKSTA_LT, reg);
+}
+
+static bool baikal_pcie_link_wait_training_done(struct dw_pcie *pci)
+{
+	struct device *dev = pci->dev;
+	unsigned long start_jiffies = jiffies;
+
+	while (baikal_pcie_link_is_training(pci)) {
+		if (time_after(jiffies, start_jiffies + HZ)) {
+			dev_err(dev, "link training timeout occured\n");
+			return false;
+		}
+
+		udelay(100);
+	}
+
+	return true;
+}
+
+#define BM1000_PCIE_GPR_RESET_BASE		0x00
+#define BM1000_PCIE_GPR_RESET(x)		((x * 0x20) + BM1000_PCIE_GPR_RESET_BASE)
+#define BM1000_PCIE_PHY_RST			BIT(0)
+#define BM1000_PCIE_PIPE_RST			BIT(4) /* x4 controllers only */
+#define BM1000_PCIE_PIPE0_RST			BIT(4) /* x8 controller only */
+#define BM1000_PCIE_PIPE1_RST			BIT(5) /* x8 controller only */
+#define BM1000_PCIE_CORE_RST			BIT(8)
+#define BM1000_PCIE_PWR_RST			BIT(9)
+#define BM1000_PCIE_STICKY_RST			BIT(10)
+#define BM1000_PCIE_NONSTICKY_RST		BIT(11)
+#define BM1000_PCIE_HOT_RST			BIT(12)
+#define BM1000_PCIE_ADB_PWRDWN			BIT(13)
+
+#define BM1000_PCIE_GPR_STATUS_BASE		0x04
+#define BM1000_PCIE_GPR_STATUS(x)		((x * 0x20) + BM1000_PCIE_GPR_STATUS_BASE)
+
+#define BM1000_PCIE_GPR_GENCTL_BASE		0x08
+#define BM1000_PCIE_GPR_GENCTL(x)		((x * 0x20) + BM1000_PCIE_GPR_GENCTL_BASE)
+#define BM1000_PCIE_LTSSM_ENABLE		BIT(1)
+#define BM1000_PCIE_DBI2_MODE			BIT(2)
+#define BM1000_PCIE_PHY_MGMT_ENABLE		BIT(3)
+
+#define BM1000_PCIE_GPR_MSI_TRANS_CTL2		0xf8
+#define BM1000_PCIE_MSI_TRANS_EN(x)		BIT(9 + (x))
+#define BM1000_PCIE_MSI_TRANS_RCNUM(x)		((x) << (2 * (x)))
+#define BM1000_PCIE_MSI_TRANS_RCNUM_MASK(x)	((3) << (2 * (x)))
+
+#define BM1000_PCIE0_DBI_BASE			0x02200000
+#define BM1000_PCIE1_DBI_BASE			0x02210000
+#define BM1000_PCIE2_DBI_BASE			0x02220000
+
+struct bm1000_pcie {
+	struct dw_pcie		*pci;
+	unsigned		num;
+	struct regmap		*gpr;
+	struct gpio_desc	*reset_gpio;
+	bool			reset_active_low;
+	char			reset_name[32];
+	bool			retrained;
+};
+
+void bm1000_pcie_phy_enable(struct dw_pcie *pci)
+{
+	struct bm1000_pcie *bm = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+	reg |= BM1000_PCIE_PHY_MGMT_ENABLE | BM1000_PCIE_DBI2_MODE;
+	regmap_write(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), reg);
+}
+
+void bm1000_pcie_phy_disable(struct dw_pcie *pci)
+{
+	struct bm1000_pcie *bm = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+	reg &= ~(BM1000_PCIE_PHY_MGMT_ENABLE | BM1000_PCIE_DBI2_MODE);
+	regmap_write(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), reg);
+}
+
+static int bm1000_get_resources(struct platform_device *pdev,
+				struct dw_pcie *pci,
+				const struct baikal_pcie_of_data *data)
+{
+	struct bm1000_pcie *bm = platform_get_drvdata(pdev);
+	struct device *dev = pci->dev;
+	struct resource *res;
+	int reset_gpio;
+	enum of_gpio_flags gpio_flags;
+
+	bm->pci = pci;
+	bm->gpr = syscon_regmap_lookup_by_compatible("baikal,bm1000-pcie-gpr");
+	if (IS_ERR(bm->gpr)) {
+		dev_err(dev, "failed to find PCIe GPR registers\n");
+		return PTR_ERR(bm->gpr);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!res) {
+		dev_err(dev, "failed to find \"dbi\" region\n");
+		return -EINVAL;
+	}
+
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	if (res->start == BM1000_PCIE0_DBI_BASE) {
+		bm->num = 0;
+	} else if (res->start == BM1000_PCIE1_DBI_BASE) {
+		bm->num = 1;
+	} else if (res->start == BM1000_PCIE2_DBI_BASE) {
+		bm->num = 2;
+	} else {
+		dev_err(dev, "incorrect \"dbi\" base\n");
+		return -EINVAL;
+	}
+
+	reset_gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0,
+					     &gpio_flags);
+	if (gpio_is_valid(reset_gpio)) {
+		bm->reset_gpio = gpio_to_desc(reset_gpio);
+		bm->reset_active_low = gpio_flags & OF_GPIO_ACTIVE_LOW;
+		snprintf(bm->reset_name, sizeof(bm->reset_name), "pcie%u-reset",
+			 bm->num);
+	} else {
+		bm->reset_gpio = NULL;
+	}
+
+	return 0;
+}
+
+static int bm1000_pcie_link_up(struct dw_pcie *pci)
+{
+	struct bm1000_pcie *bm = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+	if (!(reg & BM1000_PCIE_LTSSM_ENABLE))
+		return 0;
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_STATUS(bm->num), &reg);
+	return (reg & BAIKAL_PCIE_LTSSM_MASK) == BAIKAL_PCIE_LTSSM_STATE_L0;
+}
+
+static int bm1000_pcie_start_link(struct dw_pcie *pci)
+{
+	struct bm1000_pcie *bm = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+	reg |= BM1000_PCIE_LTSSM_ENABLE;
+	regmap_write(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), reg);
+	return 0;
+}
+
+static void bm1000_pcie_link_speed_fixup(struct bm1000_pcie *bm,
+					 struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct dw_pcie *pci = bm->pci;
+	unsigned dev_lnkcap_speed;
+	unsigned dev_lnkcap_width;
+	unsigned rc_lnkcap_speed;
+	unsigned rc_lnksta_speed;
+	unsigned rc_target_speed;
+	u16 exp_cap_off;
+	u32 reg;
+
+	/* Return if the bus has already been retrained */
+	if (bm->retrained)
+		return;
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (dev->subordinate)
+			bm1000_pcie_link_speed_fixup(bm, dev->subordinate);
+
+	/* Skip root bridge and devices not directly attached to the RC */
+	if (pci_is_root_bus(bus) || !pci_is_root_bus(bus->parent))
+		return;
+
+	dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
+	if (dev == NULL)
+		return;
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+	reg = dw_pcie_readl_dbi(pci, exp_cap_off + PCI_EXP_LNKCAP);
+	rc_lnkcap_speed = FIELD_GET(PCI_EXP_LNKCAP_SLS, reg);
+
+	reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKSTA);
+	rc_lnksta_speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg);
+
+	if (acpi_disabled) {
+		pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &reg);
+	} else {
+		struct pci_config_window *cfg;
+		int where = pci_pcie_cap(dev) + PCI_EXP_LNKCAP;
+
+		cfg = dev->bus->sysdata;
+		if (dev->bus->number == cfg->busr.start) {
+			if (PCI_SLOT(dev->devfn) > 0)
+				reg = 0;
+			else
+				reg = readl(pci->dbi_base + where);
+		} else {
+			unsigned int busn = dev->bus->number - cfg->busr.start;
+			unsigned int devfn_shift = cfg->ops->bus_shift - 8;
+
+			reg = readl(cfg->win +
+				    (busn << cfg->ops->bus_shift) +
+				    (dev->devfn << devfn_shift) +
+				    where);
+		}
+	}
+
+	dev_lnkcap_speed = FIELD_GET(PCI_EXP_LNKCAP_SLS, reg);
+	dev_lnkcap_width = FIELD_GET(PCI_EXP_LNKCAP_MLW, reg);
+
+	baikal_pcie_link_print_status(pci);
+
+	/*
+	 * [2.5 -> 8.0 GT/s] is suitable way of retraining.
+	 * [2.5 -> 5.0 GT/s] is used when 8.0 GT/s could not be reached.
+	 * [5.0 -> 8.0 GT/s] causes system freezing sometimes.
+	 */
+	if (rc_lnkcap_speed < dev_lnkcap_speed)
+		rc_target_speed = rc_lnkcap_speed;
+	else
+		rc_target_speed = dev_lnkcap_speed;
+
+	while (rc_lnksta_speed < rc_target_speed) {
+		unsigned long start_jiffies;
+
+		/* Try to change link speed */
+		dev_info(pci->dev, "retrain link to %s GT/s\n",
+			 baikal_pcie_link_speed_str(rc_target_speed));
+
+		/* If link is already training wait for training to complete */
+		baikal_pcie_link_wait_training_done(pci);
+
+		/* Set desired speed */
+		reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKCTL2);
+		reg &= ~PCI_EXP_LNKCTL2_TLS;
+		reg |= rc_target_speed;
+		dw_pcie_writew_dbi(pci, exp_cap_off + PCI_EXP_LNKCTL2, reg);
+
+		/* Deassert and assert PORT_LOGIC_SPEED_CHANGE bit */
+		reg = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		reg &= ~PORT_LOGIC_SPEED_CHANGE;
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, reg);
+		reg |=	PORT_LOGIC_SPEED_CHANGE;
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, reg);
+
+		/* Wait for link training */
+		start_jiffies = jiffies;
+		for (;;) {
+			/* Wait for link training begin */
+			if (!baikal_pcie_link_is_training(pci)) {
+				if (time_after(jiffies, start_jiffies + HZ)) {
+					dev_err(pci->dev,
+					     "link training has not started\n");
+					/*
+					 * Don't wait for training_done()
+					 * if it hasn't started.
+					 */
+					break;
+				}
+
+				udelay(100);
+				continue;
+			}
+
+			/* Wait for link training end */
+			if (!baikal_pcie_link_wait_training_done(pci))
+				break;
+
+			if (!dw_pcie_wait_for_link(pci)) {
+				/* Wait if link switched to config/recovery */
+				baikal_pcie_link_wait_training_done(pci);
+				baikal_pcie_link_print_status(pci);
+			}
+
+			break;
+		}
+
+		/* Check if the link is down after retrain */
+		if (!bm1000_pcie_link_up(pci)) {
+			/*
+			 * Check if the link has already been down and
+			 * the link is unable to re-establish at 2.5 GT/s
+			 */
+			if (rc_lnksta_speed == 0 &&
+			    rc_target_speed == PCI_EXP_LNKCTL2_TLS_2_5GT)
+				break;
+
+			rc_lnksta_speed = 0;
+			if (rc_target_speed > PCI_EXP_LNKCTL2_TLS_2_5GT) {
+				/* Try to use lower speed */
+				--rc_target_speed;
+			}
+
+			continue;
+		}
+
+		reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKSTA);
+		rc_lnksta_speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, reg);
+
+		/* Check if the targeted speed has not been reached */
+		if (rc_lnksta_speed < rc_target_speed &&
+		    rc_target_speed > PCI_EXP_LNKCTL2_TLS_2_5GT) {
+			/* Try to use lower speed */
+			--rc_target_speed;
+		}
+	}
+
+	bm->retrained = true;
+}
+
+static void bm1000_pcie_setup_rc_acpi(struct pcie_port *pp);
+
+static u16 bm1000_pcie_get_segment_acpi(struct device *dev);
+
+static int bm1000_pcie_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bm1000_pcie *bm = dev_get_drvdata(pci->dev);
+	struct device *dev = pci->dev;
+	int err;
+	int linkup;
+	unsigned idx;
+	u16 exp_cap_off;
+	u16 ext_cap_err_off;
+	u32 reg;
+
+	/* Disable access to PHY registers and DBI2 mode */
+	bm1000_pcie_phy_disable(pci);
+
+	bm->retrained = false;
+	linkup = bm1000_pcie_link_up(pci);
+
+	/* If link is not established yet, reset the RC */
+	if (!linkup) {
+		/* Disable link training */
+		regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+		reg &= ~BM1000_PCIE_LTSSM_ENABLE;
+		regmap_write(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), reg);
+
+		/* Assert PERST pin */
+		if (bm->reset_gpio != NULL) {
+			unsigned long gpio_flags;
+
+			if (bm->reset_active_low) {
+				gpio_flags = GPIOF_ACTIVE_LOW |
+					     GPIOF_OUT_INIT_LOW;
+			} else {
+				gpio_flags = GPIOF_OUT_INIT_HIGH;
+			}
+
+			err = devm_gpio_request_one(dev,
+						   desc_to_gpio(bm->reset_gpio),
+						   gpio_flags, bm->reset_name);
+			if (err) {
+				dev_err(dev, "request GPIO failed (%d)\n", err);
+				return err;
+			}
+		}
+
+		/* Reset the RC */
+		regmap_read(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), &reg);
+		reg |= BM1000_PCIE_NONSTICKY_RST |
+		       BM1000_PCIE_STICKY_RST	 |
+		       BM1000_PCIE_PWR_RST	 |
+		       BM1000_PCIE_CORE_RST	 |
+		       BM1000_PCIE_PHY_RST;
+
+		/* If the RC is PCIe x8, reset PIPE0 and PIPE1 */
+		if (bm->num == 2) {
+			reg |= BM1000_PCIE_PIPE0_RST |
+			       BM1000_PCIE_PIPE1_RST;
+		} else {
+			reg |= BM1000_PCIE_PIPE_RST;
+		}
+
+		regmap_write(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), reg);
+		usleep_range(20000, 30000);
+
+		if (bm->reset_gpio != NULL) {
+			/* Deassert PERST pin */
+			gpiod_set_value_cansleep(bm->reset_gpio, 0);
+		}
+
+		/* Deassert PHY reset */
+		regmap_read(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), &reg);
+		reg &= ~BM1000_PCIE_PHY_RST;
+		regmap_write(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), reg);
+
+		/* Deassert all software controlled resets */
+		regmap_read(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), &reg);
+		reg &= ~(BM1000_PCIE_ADB_PWRDWN	   |
+			 BM1000_PCIE_HOT_RST	   |
+			 BM1000_PCIE_NONSTICKY_RST |
+			 BM1000_PCIE_STICKY_RST	   |
+			 BM1000_PCIE_PWR_RST	   |
+			 BM1000_PCIE_CORE_RST	   |
+			 BM1000_PCIE_PHY_RST);
+
+		if (bm->num == 2) {
+			reg &= ~(BM1000_PCIE_PIPE0_RST |
+				 BM1000_PCIE_PIPE1_RST);
+		} else {
+			reg &= ~BM1000_PCIE_PIPE_RST;
+		}
+
+		regmap_write(bm->gpr, BM1000_PCIE_GPR_RESET(bm->num), reg);
+	}
+
+	/* Deinitialize all outbound iATU regions */
+	for (idx = 0; idx < pci->num_viewport; ++idx)
+		dw_pcie_disable_atu(pci, idx, DW_PCIE_REGION_OUTBOUND);
+
+	if (acpi_disabled)
+		dw_pcie_setup_rc(pp);
+	else
+		bm1000_pcie_setup_rc_acpi(pp);
+
+	/* Enable error reporting */
+	ext_cap_err_off = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_ERR);
+	reg = dw_pcie_readl_dbi(pci, ext_cap_err_off + PCI_ERR_ROOT_COMMAND);
+	reg |= PCI_ERR_ROOT_CMD_COR_EN	    |
+	       PCI_ERR_ROOT_CMD_NONFATAL_EN |
+	       PCI_ERR_ROOT_CMD_FATAL_EN;
+	dw_pcie_writel_dbi(pci, ext_cap_err_off + PCI_ERR_ROOT_COMMAND, reg);
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL);
+	reg |= PCI_EXP_DEVCTL_CERE  |
+	       PCI_EXP_DEVCTL_NFERE |
+	       PCI_EXP_DEVCTL_FERE  |
+	       PCI_EXP_DEVCTL_URRE;
+	dw_pcie_writew_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL, reg);
+
+	reg = dw_pcie_readl_dbi(pci, exp_cap_off + PCI_EXP_RTCTL);
+	reg |= PCI_EXP_RTCTL_SECEE  |
+	       PCI_EXP_RTCTL_SENFEE |
+	       PCI_EXP_RTCTL_SEFEE  |
+	       PCI_EXP_RTCTL_PMEIE;
+	dw_pcie_writel_dbi(pci, exp_cap_off + PCI_EXP_RTCTL, reg);
+
+	if (linkup) {
+		dev_info(dev, "link is already up\n");
+	} else {
+		/* Use 2.5 GT/s rate for link establishing */
+		reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKCTL2);
+		reg &= ~PCI_EXP_LNKCTL2_TLS;
+		reg |=	PCI_EXP_LNKCTL2_TLS_2_5GT;
+		dw_pcie_writew_dbi(pci, exp_cap_off + PCI_EXP_LNKCTL2, reg);
+
+		/*
+		 * Clear PORT_LOGIC_SPEED_CHANGE bit. It has been set by
+		 * dw_pcie_setup_rc(pp). This bit causes link retraining. But
+		 * link retraining should be performed later by calling the
+		 * bm1000_pcie_link_speed_fixup().
+		 */
+		reg = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		reg &= ~PORT_LOGIC_SPEED_CHANGE;
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, reg);
+
+		/* Establish link */
+		regmap_read(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), &reg);
+		reg |= BM1000_PCIE_LTSSM_ENABLE;
+		regmap_write(bm->gpr, BM1000_PCIE_GPR_GENCTL(bm->num), reg);
+
+		dw_pcie_wait_for_link(pci);
+	}
+
+	regmap_read(bm->gpr, BM1000_PCIE_GPR_MSI_TRANS_CTL2, &reg);
+	reg &= ~BM1000_PCIE_MSI_TRANS_RCNUM_MASK(bm->num);
+	if (acpi_disabled)
+		reg |= BM1000_PCIE_MSI_TRANS_RCNUM(bm->num);
+	else
+		reg |= (bm1000_pcie_get_segment_acpi(dev) + 1) << (2 * bm->num);
+
+	reg |= BM1000_PCIE_MSI_TRANS_EN(bm->num);
+	regmap_write(bm->gpr, BM1000_PCIE_GPR_MSI_TRANS_CTL2, reg);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		dw_pcie_msi_init(pp);
+
+	/* RX/TX equalizers fine tune */
+	bm1000_pcie_tune(pci);
+
+	return 0;
+}
+
+static void bm1000_pcie_set_msi_vec_num(struct pcie_port *pp)
+{
+	pp->num_vectors = MAX_MSI_IRQS;
+}
+
+static int bm1000_pcie_msi_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *msi_node;
+
+	/*
+	 * The MSI domain is set by the generic of_msi_configure(). This
+	 * .msi_host_init() function keeps us from doing the default MSI
+	 * domain setup in dw_pcie_host_init() and also enforces the
+	 * requirement that "msi-parent" exists.
+	 */
+	msi_node = of_parse_phandle(np, "msi-parent", 0);
+	if (!msi_node) {
+		dev_err(dev, "failed to find msi-parent\n");
+		return -EINVAL;
+	}
+
+	of_node_put(msi_node);
+	return 0;
+}
+
+static irqreturn_t bm1000_pcie_aer_irq_handler(int irq, void *arg)
+{
+	struct bm1000_pcie *bm = arg;
+	struct dw_pcie *pci = bm->pci;
+	struct device *dev = pci->dev;
+	u16 exp_cap_off;
+	u16 ext_cap_err_off;
+	u16 dev_sta;
+	u32 cor_err;
+	u32 root_err;
+	u32 uncor_err;
+
+	exp_cap_off	= dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	ext_cap_err_off = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_ERR);
+
+	uncor_err	= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_UNCOR_STATUS);
+	cor_err		= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_COR_STATUS);
+	root_err	= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_ROOT_STATUS);
+	dev_sta		= dw_pcie_readw_dbi(pci,
+					exp_cap_off + PCI_EXP_DEVSTA);
+
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_UNCOR_STATUS, uncor_err);
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_COR_STATUS, cor_err);
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_ROOT_STATUS, root_err);
+	dw_pcie_writew_dbi(pci,
+			   exp_cap_off + PCI_EXP_DEVSTA, dev_sta);
+
+	dev_err(dev,
+		"DevSta:0x%04x RootErr:0x%x UncorErr:0x%x CorErr:0x%x\n",
+		dev_sta, root_err, uncor_err, cor_err);
+
+	return IRQ_HANDLED;
+}
+
+static const struct dw_pcie_host_ops bm1000_pcie_host_ops = {
+	.host_init	 = bm1000_pcie_host_init,
+	.msi_host_init	 = bm1000_pcie_msi_host_init,
+	.set_num_vectors = bm1000_pcie_set_msi_vec_num,
+};
+
+static int bm1000_add_pcie_port(struct platform_device *pdev)
+{
+	struct bm1000_pcie *bm = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci = bm->pci;
+	struct pcie_port *pp = &pci->pp;
+	int ret;
+
+	pp->irq = platform_get_irq_byname(pdev, "aer");
+	if (pp->irq < 0) {
+		dev_err(dev, "failed to get \"aer\" IRQ\n");
+		return pp->irq;
+	}
+
+	ret = devm_request_irq(dev, pp->irq, bm1000_pcie_aer_irq_handler,
+			       IRQF_SHARED, "bm1000-pcie-aer", bm);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ %d\n", pp->irq);
+		return ret;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		pp->msi_irq = platform_get_irq_byname(pdev, "msi");
+		if (pp->msi_irq < 0) {
+			dev_err(dev, "failed to get \"msi\" IRQ\n");
+			return pp->msi_irq;
+		}
+	}
+
+	pp->ops = &bm1000_pcie_host_ops;
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	bm1000_pcie_link_speed_fixup(bm, pp->root_bus);
+	return 0;
+}
+
+static const struct baikal_pcie_of_data bm1000_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+	.dw_pcie_ops = {
+		.link_up	= bm1000_pcie_link_up,
+		.start_link	= bm1000_pcie_start_link,
+	},
+	.get_resources = bm1000_get_resources,
+};
+
+static const struct of_device_id bm1000_pcie_of_match[] = {
+	{
+		.compatible = "baikal,bm1000-pcie",
+		.data = &bm1000_pcie_rc_of_data,
+	},
+	{ },
+};
+
+static const struct dw_pcie_ops bm1000_dw_pcie_ops = {
+	.link_up = bm1000_pcie_link_up,
+};
+
+static int bm1000_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct bm1000_pcie *bm;
+	struct dw_pcie *pci;
+	int ret;
+	enum of_gpio_flags gpio_flags;
+	int reset_gpio;
+	struct resource *res;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &bm1000_dw_pcie_ops;
+
+	bm = devm_kzalloc(dev, sizeof(*bm), GFP_KERNEL);
+	if (!bm)
+		return -ENOMEM;
+
+	bm->pci = pci;
+	bm->gpr = syscon_regmap_lookup_by_compatible("baikal,bm1000-pcie-gpr");
+	if (IS_ERR(bm->gpr)) {
+		dev_err(dev, "failed to find PCIe GPR registers\n");
+		return PTR_ERR(bm->gpr);
+	}
+
+	platform_set_drvdata(pdev, bm);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!res) {
+		dev_err(dev, "failed to find \"dbi\" region\n");
+		return -EINVAL;
+	}
+
+	devm_request_resource(dev, &iomem_resource, res);
+	pci->dbi_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pci->dbi_base)) {
+		dev_err(dev, "error with ioremap\n");
+		return PTR_ERR(pci->dbi_base);
+	}
+
+	if (res->start == BM1000_PCIE0_DBI_BASE) {
+		bm->num = 0;
+	} else if (res->start == BM1000_PCIE1_DBI_BASE) {
+		bm->num = 1;
+	} else if (res->start == BM1000_PCIE2_DBI_BASE) {
+		bm->num = 2;
+	} else {
+		dev_err(dev, "incorrect \"dbi\" base\n");
+		return -EINVAL;
+	}
+
+	reset_gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0,
+					     &gpio_flags);
+	if (gpio_is_valid(reset_gpio)) {
+		bm->reset_gpio = gpio_to_desc(reset_gpio);
+		bm->reset_active_low = !!(gpio_flags & OF_GPIO_ACTIVE_LOW);
+		snprintf(bm->reset_name, sizeof(bm->reset_name), "pcie%u-reset",
+			 bm->num);
+	} else {
+		bm->reset_gpio = NULL;
+	}
+
+	ret = bm1000_add_pcie_port(pdev);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct platform_driver bm1000_pcie_driver = {
+	.driver = {
+		.name = "baikal-bm1000-pcie",
+		.of_match_table = bm1000_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = bm1000_pcie_probe,
+};
+builtin_platform_driver(bm1000_pcie_driver);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+static u16 bm1000_pcie_get_segment_acpi(struct device *dev)
+{
+	struct acpi_pci_root *root = acpi_driver_data(to_acpi_device(dev));
+	return root->segment;
+}
+
+static void bm1000_pcie_prog_outbound_atu(struct dw_pcie *pci, int index,
+					  int type, u64 cpu_addr, u64 pci_addr,
+					  u32 size, u32 flags)
+{
+	u32 retries, val;
+
+	dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT,
+			   PCIE_ATU_REGION_OUTBOUND | index);
+	dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_BASE,
+			   lower_32_bits(cpu_addr));
+	dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_BASE,
+			   upper_32_bits(cpu_addr));
+	dw_pcie_writel_dbi(pci, PCIE_ATU_LIMIT,
+			   lower_32_bits(cpu_addr + size - 1));
+	dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_TARGET,
+			   lower_32_bits(pci_addr));
+	dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_TARGET,
+			   upper_32_bits(pci_addr));
+	dw_pcie_writel_dbi(pci, PCIE_ATU_CR1, type);
+	dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, PCIE_ATU_ENABLE | flags);
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; ++retries) {
+		val = dw_pcie_readl_dbi(pci, PCIE_ATU_CR2);
+		if (val & PCIE_ATU_ENABLE)
+			return;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+	dev_err(pci->dev, "Outbound iATU is not being enabled\n");
+}
+
+#define PCIE_IATU_REGION_CTRL_2_REG_SHIFT_MODE	BIT(28)
+
+static void bm1000_pcie_setup_rc_acpi(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	acpi_status status;
+	u64 lanes;
+	u32 val;
+
+	/*
+	 * Enable DBI read-only registers for writing/updating configuration.
+	 * Write permission gets disabled towards the end of this function.
+	 */
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	status = acpi_evaluate_integer(to_acpi_device(pci->dev)->handle,
+				       "NUML", NULL, &lanes);
+	if (ACPI_FAILURE(status)) {
+		dev_dbg(pci->dev, "failed to get num-lanes\n");
+	} else {
+		/* Set the number of lanes */
+		val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+		val &= ~PORT_LINK_MODE_MASK;
+		switch (lanes) {
+		case 1:
+			val |= PORT_LINK_MODE_1_LANES;
+			break;
+		case 2:
+			val |= PORT_LINK_MODE_2_LANES;
+			break;
+		case 4:
+			val |= PORT_LINK_MODE_4_LANES;
+			break;
+		case 8:
+			val |= PORT_LINK_MODE_8_LANES;
+			break;
+		default:
+			dev_err(pci->dev, "NUML %llu: invalid value\n", lanes);
+			goto skip_lanes;
+		}
+		dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
+		/* Set link width speed control register */
+		val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		val &= ~PORT_LOGIC_LINK_WIDTH_MASK;
+		switch (lanes) {
+		case 1:
+			val |= PORT_LOGIC_LINK_WIDTH_1_LANES;
+			break;
+		case 2:
+			val |= PORT_LOGIC_LINK_WIDTH_2_LANES;
+			break;
+		case 4:
+			val |= PORT_LOGIC_LINK_WIDTH_4_LANES;
+			break;
+		case 8:
+			val |= PORT_LOGIC_LINK_WIDTH_8_LANES;
+			break;
+		}
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+	}
+
+skip_lanes:
+	/* Setup RC BARs */
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0x00000004);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
+
+	/* Setup interrupt pins */
+	val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
+	val &= 0xffff00ff;
+	val |= 0x00000100;
+	dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
+
+	/* Setup bus numbers */
+	val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
+	val &= 0xff000000;
+	val |= 0x00ff0100;
+	dw_pcie_writel_dbi(pci, PCI_PRIMARY_BUS, val);
+
+	/* Setup command register */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	val &= 0xffff0000;
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
+	dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+	/* Program ATU */
+	bm1000_pcie_prog_outbound_atu(pci, 0, PCIE_ATU_TYPE_CFG0,
+				      pp->cfg0_base, 0,
+				      pp->cfg0_size,
+				      PCIE_IATU_REGION_CTRL_2_REG_SHIFT_MODE);
+	bm1000_pcie_prog_outbound_atu(pci, 1, PCIE_ATU_TYPE_CFG1,
+				      pp->cfg1_base, 0,
+				      pp->cfg1_size,
+				      PCIE_IATU_REGION_CTRL_2_REG_SHIFT_MODE);
+	bm1000_pcie_prog_outbound_atu(pci, 2, PCIE_ATU_TYPE_MEM,
+				      pp->mem_base, pp->mem_bus_addr,
+				      pp->mem_size, 0);
+	bm1000_pcie_prog_outbound_atu(pci, 3, PCIE_ATU_TYPE_IO,
+				      pp->io_base, pp->io_bus_addr,
+				      pp->io_size, 0);
+
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+	/* Program correct class for RC */
+	dw_pcie_writew_dbi(pci, PCI_CLASS_DEVICE, PCI_CLASS_BRIDGE_PCI);
+
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+}
+
+static int bm1000_pcie_get_res_acpi(struct acpi_device *adev,
+				    struct acpi_device **res_dev,
+				    struct bm1000_pcie *bm)
+{
+	struct device *dev = &adev->dev;
+	struct pcie_port *pp = &bm->pci->pp;
+	struct resource_entry *entry;
+	struct list_head list, *pos;
+	struct fwnode_handle *fwnode;
+	int ret;
+	unsigned long flags = IORESOURCE_MEM;
+
+	fwnode = fwnode_get_named_child_node(&adev->fwnode, "RES0");
+	if (!fwnode) {
+		dev_err(dev, "failed to get RES0 subdevice\n");
+		return -EINVAL;
+	}
+
+	*res_dev = to_acpi_device_node(fwnode);
+	if (!*res_dev) {
+		dev_err(dev, "RES0 is not an acpi device node\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&list);
+	ret = acpi_dev_get_resources(*res_dev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse RES0._CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 2) {
+		dev_err(dev, "invalid number of MEM resources present in RES0._CRS (%i, need 2)\n", ret);
+		return -EINVAL;
+	}
+
+	/* ECAM */
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->cfg0_size = resource_size(entry->res);
+	pp->cfg1_size = pp->cfg0_size;
+	pp->cfg0_base = entry->res->start;
+	pp->cfg1_base = pp->cfg0_base;
+
+	/* DBI */
+	pos = pos->next;
+	entry = list_entry(pos, struct resource_entry, node);
+	if (entry->res->start == BM1000_PCIE0_DBI_BASE) {
+		bm->num = 0;
+	} else if (entry->res->start == BM1000_PCIE1_DBI_BASE) {
+		bm->num = 1;
+	} else if (entry->res->start == BM1000_PCIE2_DBI_BASE) {
+		bm->num = 2;
+	} else {
+		dev_err(dev, "incorrect \"dbi\" base\n");
+		return -EINVAL;
+	}
+	bm->pci->dbi_base = devm_ioremap_resource(dev, entry->res);
+	if (IS_ERR(bm->pci->dbi_base)) {
+		dev_err(dev, "error with dbi ioremap\n");
+		ret = PTR_ERR(bm->pci->dbi_base);
+		return ret;
+	}
+
+	acpi_dev_free_resource_list(&list);
+
+	/* Non-prefetchable memory */
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_MEM;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 1) {
+		dev_err(dev, "invalid number of MEM resources present in _CRS (%i, need 1)\n", ret);
+		return -EINVAL;
+	}
+
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->mem_base = entry->res->start;
+	pp->mem_size = resource_size(entry->res);
+	pp->mem_bus_addr = entry->res->start - entry->offset;
+
+	acpi_dev_free_resource_list(&list);
+
+	/* I/O */
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_IO;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 1) {
+		dev_err(dev, "invalid number of IO resources present in _CRS (%i, need 1)\n", ret);
+		return -EINVAL;
+	}
+
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->io_base = entry->res->start;
+	pp->io_size = resource_size(entry->res);
+	pp->io_bus_addr = entry->res->start - entry->offset;
+
+	acpi_dev_free_resource_list(&list);
+	return 0;
+}
+
+static int bm1000_pcie_get_irq_acpi(struct device *dev,
+				    struct acpi_device *res_dev,
+				    struct bm1000_pcie *bm)
+{
+	struct pcie_port *pp = &bm->pci->pp;
+	struct resource res;
+	int ret;
+
+	memset(&res, 0, sizeof(res));
+
+	ret = acpi_irq_get(res_dev->handle, 0, &res);
+	if (ret) {
+		dev_err(dev, "failed to get irq %d\n", 0);
+		return ret;
+	}
+
+	if (res.flags & IORESOURCE_BITS) {
+		struct irq_data *irqd;
+
+		irqd = irq_get_irq_data(res.start);
+		if (!irqd)
+			return -ENXIO;
+
+		irqd_set_trigger_type(irqd, res.flags & IORESOURCE_BITS);
+	}
+
+	pp->irq = res.start;
+
+	ret = devm_request_irq(dev, pp->irq, bm1000_pcie_aer_irq_handler,
+			       IRQF_SHARED, "bm1000-pcie-aer", bm);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", pp->irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct regmap *bm1000_regmap;
+
+static const struct regmap_config bm1000_pcie_syscon_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4
+};
+
+static struct regmap *bm1000_pcie_get_gpr_acpi(struct bm1000_pcie *bm)
+{
+	struct device *dev;
+	struct acpi_device *adev;
+	acpi_handle handle;
+	acpi_status status = AE_OK;
+	struct list_head list, *pos;
+	struct resource *res;
+	void __iomem *base;
+	struct regmap *regmap = NULL;
+	struct regmap_config config = bm1000_pcie_syscon_regmap_config;
+	unsigned long flags = IORESOURCE_MEM;
+	int ret;
+
+	if (bm1000_regmap)
+		return bm1000_regmap;
+
+	status = acpi_get_handle(NULL, "\\_SB.PGPR", &handle);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get PCIe GPR device\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	ret = acpi_bus_get_device(handle, &adev);
+	if (ret) {
+		dev_err(dev, "failed to process PCIe GPR handle\n");
+		return ERR_PTR(ret);
+	}
+
+	dev = &adev->dev;
+	INIT_LIST_HEAD(&list);
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n", ret);
+		return NULL;
+	}
+
+	if (ret != 1) {
+		dev_err(dev, "invalid number of MEM resources present in _CRS (%i, need 1)\n", ret);
+		goto ret;
+	}
+
+	pos = list.next;
+	res = list_entry(pos, struct resource_entry, node)->res;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base) {
+		dev_err(dev, "error with ioremap\n");
+		goto ret;
+	}
+
+	config.max_register = resource_size(res) - 4;
+
+	regmap = devm_regmap_init_mmio(dev, base, &config);
+	if (IS_ERR(regmap)) {
+		dev_err(dev, "regmap init failed\n");
+		devm_iounmap(dev, base);
+		goto ret;
+	}
+
+	dev_dbg(dev, "regmap %pR registered\n", res);
+
+	bm1000_regmap = regmap;
+
+ret:
+	acpi_dev_free_resource_list(&list);
+	return regmap;
+}
+
+static int bm1000_get_acpi_data(struct device *dev, struct bm1000_pcie *bm)
+{
+	struct acpi_device *adev = to_acpi_device(dev), *res_dev;
+	acpi_status status = AE_OK;
+	u64 num_viewport;
+	int ret;
+
+	bm->gpr = bm1000_pcie_get_gpr_acpi(bm);
+	if (IS_ERR_OR_NULL(bm->gpr)) {
+		dev_err(dev, "No PCIe GPR specified\n");
+		return -EINVAL;
+	}
+
+	/* TODO: GPIO */
+
+	ret = bm1000_pcie_get_res_acpi(adev, &res_dev, bm);
+	if (ret) {
+		dev_err(dev, "failed to get resource info\n");
+		return ret;
+	}
+
+	ret = bm1000_pcie_get_irq_acpi(dev, res_dev, bm);
+	if (ret) {
+		dev_err(dev, "failed to get irq info\n");
+		return ret;
+	}
+
+	status = acpi_evaluate_integer(adev->handle, "NUMV", NULL,
+				       &num_viewport);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get num-viewport\n");
+		return -EINVAL;
+	}
+	bm->pci->num_viewport = num_viewport;
+
+	return 0;
+}
+
+static int bm1000_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct bm1000_pcie *bm;
+	struct dw_pcie *pci;
+	struct pcie_port *pp;
+	int ret;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &bm1000_dw_pcie_ops;
+
+	bm = devm_kzalloc(dev, sizeof(*bm), GFP_KERNEL);
+	if (!bm)
+		return -ENOMEM;
+
+	cfg->priv = bm;
+	bm->pci = pci;
+	dev_set_drvdata(dev, bm);
+
+	ret = bm1000_get_acpi_data(dev, bm);
+	if (ret) {
+		dev_err(dev, "failed to get data from ACPI\n");
+		return ret;
+	}
+
+	pp = &pci->pp;
+	raw_spin_lock_init(&pp->lock);
+	pp->ops = &bm1000_pcie_host_ops;
+	pp->root_bus_nr = cfg->busr.start;
+	pp->busn = &cfg->busr;
+	pp->va_cfg0_base = devm_pci_remap_cfgspace(dev, pp->cfg0_base,
+						   pp->cfg0_size);
+	if (!pp->va_cfg0_base) {
+		dev_err(dev, "error with ioremap\n");
+		return -ENOMEM;
+	}
+	pp->va_cfg1_base = pp->va_cfg0_base;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_err(dev, "failed to enable DMA\n");
+		return ret;
+	}
+
+	ret = bm1000_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "Failed to initialize host\n");
+		return ret;
+	}
+
+	/* Link will be retrained by 'bm1000_pcie_map_bus()' */
+	bm->retrained = false;
+	return 0;
+}
+
+static void __iomem *bm1000_pcie_map_bus(struct pci_bus *bus,
+					 unsigned int devfn, int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct bm1000_pcie *bm = cfg->priv;
+	unsigned int devfn_shift = cfg->ops->bus_shift - 8;
+	unsigned int busn = bus->number;
+	void __iomem *base;
+
+	if (!bm->retrained) {
+		struct pci_bus **root_bus = &bm->pci->pp.root_bus;
+
+		if (*root_bus == NULL) {
+			*root_bus = bus;
+		} else if ((*root_bus)->is_added) {
+			struct acpi_device *adev = to_acpi_device(cfg->parent);
+			struct acpi_pci_root *root = adev->driver_data;
+
+			if (root->bus == *root_bus) {
+				pci_bus_add_devices(root->bus);
+				bm1000_pcie_link_speed_fixup(bm, root->bus);
+				bm->retrained = true;
+			}
+		}
+	}
+
+	if (bus->number != cfg->busr.start && !bm1000_pcie_link_up(bm->pci))
+		return NULL;
+
+	if (bus->number == cfg->busr.start) {
+		/*
+		 * The DW PCIe core doesn't filter out transactions to other
+		 * devices/functions on the root bus num, so we do this here.
+		 */
+		if (PCI_SLOT(devfn) > 0)
+			return NULL;
+		else
+			return bm->pci->dbi_base + where;
+	}
+
+	if (busn < cfg->busr.start || busn > cfg->busr.end)
+		return NULL;
+
+	busn -= cfg->busr.start;
+	base = cfg->win + (busn << cfg->ops->bus_shift);
+	return base + (devfn << devfn_shift) + where;
+}
+
+struct pci_ecam_ops baikal_m_pcie_ecam_ops = {
+	.bus_shift	= 20,
+	.init		= bm1000_pcie_init,
+	.pci_ops	= {
+		.map_bus	= bm1000_pcie_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write
+	}
+};
+#endif
+
+#define BS1000_PCIE_APB_PE_GEN_CTRL3			0x58
+#define BS1000_PCIE_APB_PE_GEN_CTRL3_LTSSM_EN		BIT(0)
+
+#define BS1000_PCIE_APB_PE_LINK_DBG2			0xb4
+#define BS1000_PCIE_APB_PE_LINK_DBG2_SMLH_LINK_UP	BIT(6)
+#define BS1000_PCIE_APB_PE_LINK_DBG2_RDLH_LINK_UP	BIT(7)
+
+#define BS1000_PCIE_APB_PE_ERR_STS			0xe0
+#define BS1000_PCIE_APB_PE_INT_STS			0xe8
+
+#define BS1000_PCIE0_P0_DBI_BASE			0x39000000
+#define BS1000_PCIE0_P1_DBI_BASE			0x39400000
+#define BS1000_PCIE1_P0_DBI_BASE			0x3d000000
+#define BS1000_PCIE1_P1_DBI_BASE			0x3d400000
+#define BS1000_PCIE2_P0_DBI_BASE			0x45000000
+#define BS1000_PCIE2_P1_DBI_BASE			0x45400000
+
+struct bs1000_pcie {
+	struct dw_pcie		*pci;
+	void __iomem		*apb_base;
+	u64			cpu_addr_mask;
+};
+
+static void bs1000_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar;
+
+	for (bar = BAR_0; bar <= BAR_5; bar++)
+		dw_pcie_ep_reset_bar(pci, bar);
+}
+
+static int bs1000_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
+				    enum pci_epc_irq_type type,
+				    u16 interrupt_num)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+
+	switch (type) {
+	case PCI_EPC_IRQ_LEGACY:
+		return dw_pcie_ep_raise_legacy_irq(ep, func_no);
+	case PCI_EPC_IRQ_MSI:
+		return dw_pcie_ep_raise_msi_irq(ep, func_no, interrupt_num);
+	default:
+		dev_err(pci->dev, "UNKNOWN IRQ type\n");
+		return -EINVAL;
+	}
+}
+
+static const struct pci_epc_features bs1000_pcie_epc_features = {
+	.linkup_notifier = false,
+	.msi_capable = true,
+	.msix_capable = false,
+	.reserved_bar = BIT(BAR_3) | BIT(BAR_5),
+};
+
+static const struct pci_epc_features*
+bs1000_pcie_ep_get_features(struct dw_pcie_ep *ep)
+{
+	return &bs1000_pcie_epc_features;
+}
+
+static const struct dw_pcie_ep_ops bs1000_pcie_ep_ops = {
+	.ep_init	= bs1000_pcie_ep_init,
+	.raise_irq	= bs1000_pcie_ep_raise_irq,
+	.get_features	= bs1000_pcie_ep_get_features,
+};
+
+static int bs1000_get_resources(struct platform_device *pdev,
+				struct dw_pcie *pci,
+				const struct baikal_pcie_of_data *data)
+{
+	struct bs1000_pcie *bs = platform_get_drvdata(pdev);
+	struct device *dev = pci->dev;
+	struct resource *res;
+
+	bs->pci = pci;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!res) {
+		dev_err(dev, "failed to find \"dbi\" region\n");
+		return -EINVAL;
+	}
+
+	pci->dbi_base = devm_pci_remap_cfg_resource(dev, res);
+	if (IS_ERR(pci->dbi_base))
+		return PTR_ERR(pci->dbi_base);
+
+	if (res->start == BS1000_PCIE0_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE0_P1_DBI_BASE ||
+	    res->start == BS1000_PCIE1_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE1_P1_DBI_BASE ||
+	    res->start == BS1000_PCIE2_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE2_P1_DBI_BASE) {
+		bs->cpu_addr_mask = 0x7fffffffff;
+	} else {
+		bs->cpu_addr_mask = 0xffffffffff;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "apb");
+	if (res) {
+		devm_request_resource(dev, &iomem_resource, res);
+		bs->apb_base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(bs->apb_base))
+			return PTR_ERR(bs->apb_base);
+	} else {
+		dev_err(dev, "missing *apb* reg space\n");
+		return -EINVAL;
+	}
+
+	if (data->mode == DW_PCIE_EP_TYPE) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
+		if (!res)
+			return -EINVAL;
+
+		pci->ep.phys_base = res->start;
+		pci->ep.addr_size = resource_size(res);
+		pci->ep.ops = &bs1000_pcie_ep_ops;
+		pci->dbi_base2 = pci->dbi_base + 0x100000;
+	}
+
+	return 0;
+}
+
+static u64 bs1000_pcie_cpu_addr_fixup(struct dw_pcie *pci, u64 cpu_addr)
+{
+	struct bs1000_pcie *bs = dev_get_drvdata(pci->dev);
+
+	return cpu_addr & bs->cpu_addr_mask;
+}
+
+static int bs1000_pcie_link_up(struct dw_pcie *pci)
+{
+	struct bs1000_pcie *bs = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	reg = readl(bs->apb_base + BS1000_PCIE_APB_PE_LINK_DBG2);
+	return ((reg & BAIKAL_PCIE_LTSSM_MASK) == BAIKAL_PCIE_LTSSM_STATE_L0) &&
+		(reg & BS1000_PCIE_APB_PE_LINK_DBG2_SMLH_LINK_UP) &&
+		(reg & BS1000_PCIE_APB_PE_LINK_DBG2_RDLH_LINK_UP);
+}
+
+static int bs1000_pcie_start_link(struct dw_pcie *pci)
+{
+	struct bs1000_pcie *bs = dev_get_drvdata(pci->dev);
+	u32 reg;
+
+	reg = readl(bs->apb_base + BS1000_PCIE_APB_PE_GEN_CTRL3);
+	reg |= BS1000_PCIE_APB_PE_GEN_CTRL3_LTSSM_EN;
+	writel(reg, bs->apb_base + BS1000_PCIE_APB_PE_GEN_CTRL3);
+	return 0;
+}
+
+static void bs1000_pcie_setup_rc_acpi(struct pcie_port *pp);
+
+static int bs1000_pcie_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	unsigned idx;
+	u16 exp_cap_off;
+	u16 ext_cap_err_off;
+	u32 reg;
+
+	/* Deinitialize all outbound iATU regions */
+	for (idx = 0; idx < pci->num_viewport; ++idx)
+		dw_pcie_disable_atu(pci, idx, DW_PCIE_REGION_OUTBOUND);
+
+	if (acpi_disabled)
+		dw_pcie_setup_rc(pp);
+	else
+		bs1000_pcie_setup_rc_acpi(pp);
+
+	/* Enable error reporting */
+	ext_cap_err_off = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_ERR);
+	reg = dw_pcie_readl_dbi(pci, ext_cap_err_off + PCI_ERR_ROOT_COMMAND);
+	reg |= PCI_ERR_ROOT_CMD_COR_EN	    |
+	       PCI_ERR_ROOT_CMD_NONFATAL_EN |
+	       PCI_ERR_ROOT_CMD_FATAL_EN;
+	dw_pcie_writel_dbi(pci, ext_cap_err_off + PCI_ERR_ROOT_COMMAND, reg);
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	reg = dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL);
+	reg |= PCI_EXP_DEVCTL_CERE  |
+	       PCI_EXP_DEVCTL_NFERE |
+	       PCI_EXP_DEVCTL_FERE  |
+	       PCI_EXP_DEVCTL_URRE;
+	dw_pcie_writew_dbi(pci, exp_cap_off + PCI_EXP_DEVCTL, reg);
+
+	reg = dw_pcie_readl_dbi(pci, exp_cap_off + PCI_EXP_RTCTL);
+	reg |= PCI_EXP_RTCTL_SECEE  |
+	       PCI_EXP_RTCTL_SENFEE |
+	       PCI_EXP_RTCTL_SEFEE  |
+	       PCI_EXP_RTCTL_PMEIE;
+	dw_pcie_writel_dbi(pci, exp_cap_off + PCI_EXP_RTCTL, reg);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		dw_pcie_msi_init(pp);
+
+	return 0;
+}
+
+static irqreturn_t bs1000_pcie_intr_irq_handler(int irq, void *arg)
+{
+	struct bs1000_pcie *bs = arg;
+	struct dw_pcie *pci = bs->pci;
+	struct device *dev = pci->dev;
+	u16 exp_cap_off;
+	u16 ext_cap_err_off;
+	u16 dev_sta;
+	u32 cor_err;
+	u32 root_err;
+	u32 uncor_err;
+	u32 apb_pe_err;
+	u32 apb_pe_int;
+
+	apb_pe_err = readl(bs->apb_base + BS1000_PCIE_APB_PE_ERR_STS);
+	apb_pe_int = readl(bs->apb_base + BS1000_PCIE_APB_PE_INT_STS);
+
+	exp_cap_off	= dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	ext_cap_err_off = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_ERR);
+
+	uncor_err	= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_UNCOR_STATUS);
+	cor_err		= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_COR_STATUS);
+	root_err	= dw_pcie_readl_dbi(pci,
+					ext_cap_err_off + PCI_ERR_ROOT_STATUS);
+	dev_sta		= dw_pcie_readw_dbi(pci,
+					exp_cap_off + PCI_EXP_DEVSTA);
+
+	writel(apb_pe_err, bs->apb_base + BS1000_PCIE_APB_PE_ERR_STS);
+	writel(apb_pe_int, bs->apb_base + BS1000_PCIE_APB_PE_INT_STS);
+
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_UNCOR_STATUS, uncor_err);
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_COR_STATUS, cor_err);
+	dw_pcie_writel_dbi(pci,
+			   ext_cap_err_off + PCI_ERR_ROOT_STATUS, root_err);
+	dw_pcie_writew_dbi(pci,
+			   exp_cap_off + PCI_EXP_DEVSTA, dev_sta);
+
+	dev_err(dev,
+		"DevSta:0x%04x RootErr:0x%x UncorErr:0x%x CorErr:0x%x ApbErr:0x%x ApbInt:0x%x\n",
+		dev_sta, root_err, uncor_err, cor_err, apb_pe_err, apb_pe_int);
+
+	return IRQ_HANDLED;
+}
+
+static void bs1000_pcie_set_msi_vec_num(struct pcie_port *pp)
+{
+	pp->num_vectors = MAX_MSI_IRQS;
+}
+
+static int bs1000_pcie_msi_host_init(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct device *dev = pci->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *msi_node;
+
+	/*
+	 * The MSI domain is set by the generic of_msi_configure(). This
+	 * .msi_host_init() function keeps us from doing the default MSI
+	 * domain setup in dw_pcie_host_init() and also enforces the
+	 * requirement that "msi-parent" exists.
+	 */
+	msi_node = of_parse_phandle(np, "msi-parent", 0);
+	if (!msi_node) {
+		dev_err(dev, "failed to find msi-parent\n");
+		return -EINVAL;
+	}
+
+	of_node_put(msi_node);
+	return 0;
+}
+
+static const struct dw_pcie_host_ops bs1000_pcie_host_ops = {
+	.host_init	 = bs1000_pcie_host_init,
+	.msi_host_init	 = bs1000_pcie_msi_host_init,
+	.set_num_vectors = bs1000_pcie_set_msi_vec_num
+};
+
+static int bs1000_add_pcie_port(struct bs1000_pcie *bs,
+				struct platform_device *pdev)
+{
+	struct dw_pcie *pci = bs->pci;
+	struct pcie_port *pp = &pci->pp;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	pp->irq = platform_get_irq_byname(pdev, "intr");
+	if (pp->irq < 0) {
+		dev_err(dev, "failed to get \"intr\" IRQ\n");
+		return pp->irq;
+	}
+
+	ret = devm_request_irq(dev, pp->irq, bs1000_pcie_intr_irq_handler,
+			       IRQF_SHARED, "bs1000-pcie-intr", bs);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ %d\n", pp->irq);
+		return ret;
+	}
+
+	if (IS_ENABLED(CONFIG_PCI_MSI)) {
+		pp->msi_irq = platform_get_irq_byname(pdev, "msi");
+		if (pp->msi_irq < 0) {
+			dev_err(dev, "failed to get \"msi\" IRQ\n");
+			return pp->msi_irq;
+		}
+	}
+
+	pp->ops = &bs1000_pcie_host_ops;
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int bs1000_add_pcie_ep(struct bs1000_pcie *bs,
+			      struct platform_device *pdev)
+{
+	int ret;
+	struct dw_pcie_ep *ep;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct dw_pcie *pci = bs->pci;
+
+	ep = &pci->ep;
+	ep->ops = &bs1000_pcie_ep_ops;
+
+	pci->dbi_base2 = pci->dbi_base + 0x100000;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
+	if (!res)
+		return -EINVAL;
+
+	ep->phys_base = res->start;
+	ep->addr_size = resource_size(res);
+
+	ret = dw_pcie_ep_init(ep);
+	if (ret) {
+		dev_err(dev, "failed to initialize endpoint\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct baikal_pcie_of_data bs1000_pcie_rc_of_data = {
+	.mode = DW_PCIE_RC_TYPE,
+	.dw_pcie_ops = {
+		.cpu_addr_fixup	= bs1000_pcie_cpu_addr_fixup,
+		.link_up	= bs1000_pcie_link_up,
+		.start_link	= bs1000_pcie_start_link,
+	},
+	.get_resources = bs1000_get_resources,
+};
+
+static const struct baikal_pcie_of_data bs1000_pcie_ep_of_data = {
+	.mode = DW_PCIE_EP_TYPE,
+	.dw_pcie_ops = {
+		.cpu_addr_fixup	= bs1000_pcie_cpu_addr_fixup,
+		.link_up	= bs1000_pcie_link_up,
+		.start_link	= bs1000_pcie_start_link,
+	},
+	.get_resources = bs1000_get_resources,
+};
+
+static const struct of_device_id bs1000_pcie_of_match[] = {
+	{
+		.compatible = "baikal,bs1000-pcie",
+		.data = &bs1000_pcie_rc_of_data,
+	},
+	{
+		.compatible = "baikal,bs1000-pcie-ep",
+		.data = &bs1000_pcie_ep_of_data,
+	},
+	{ },
+};
+
+static const struct dw_pcie_ops bs1000_pcie_ops = {
+	.cpu_addr_fixup	= bs1000_pcie_cpu_addr_fixup,
+	.link_up	= bs1000_pcie_link_up,
+	.start_link	= bs1000_pcie_start_link,
+};
+
+static int bs1000_pcie_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct bs1000_pcie *bs;
+	struct dw_pcie *pci;
+	int ret;
+	struct resource *res;
+	const struct baikal_pcie_of_data *data;
+	const struct of_device_id *match;
+	enum dw_pcie_device_mode mode;
+
+	match = of_match_device(bs1000_pcie_of_match, dev);
+	if (!match)
+		return -EINVAL;
+
+	data = (struct baikal_pcie_of_data *)match->data;
+	mode = (enum dw_pcie_device_mode)data->mode;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &bs1000_pcie_ops;
+
+	bs = devm_kzalloc(dev, sizeof(*bs), GFP_KERNEL);
+	if (!bs)
+		return -ENOMEM;
+
+	bs->pci = pci;
+
+	platform_set_drvdata(pdev, bs);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+	if (!res) {
+		dev_err(dev, "failed to find \"dbi\" region\n");
+		return -EINVAL;
+	}
+
+	devm_request_resource(dev, &iomem_resource, res);
+	pci->dbi_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pci->dbi_base)) {
+		dev_err(dev, "error with ioremap\n");
+		return PTR_ERR(pci->dbi_base);
+	}
+
+	if (res->start == BS1000_PCIE0_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE0_P1_DBI_BASE ||
+	    res->start == BS1000_PCIE1_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE1_P1_DBI_BASE ||
+	    res->start == BS1000_PCIE2_P0_DBI_BASE ||
+	    res->start == BS1000_PCIE2_P1_DBI_BASE) {
+		bs->cpu_addr_mask = 0x7fffffffff;
+	} else {
+		bs->cpu_addr_mask = 0xffffffffff;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "apb");
+	if (res) {
+		devm_request_resource(dev, &iomem_resource, res);
+		bs->apb_base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(bs->apb_base))
+			return PTR_ERR(bs->apb_base);
+	} else {
+		dev_err(dev, "missing *apb* reg space\n");
+		return -EINVAL;
+	}
+
+	switch (mode) {
+	case DW_PCIE_RC_TYPE:
+		ret = bs1000_add_pcie_port(bs, pdev);
+		if (ret < 0)
+			return ret;
+
+		break;
+	case DW_PCIE_EP_TYPE:
+		ret = bs1000_add_pcie_ep(bs, pdev);
+		if (ret < 0)
+			return ret;
+
+		break;
+	default:
+		dev_err(dev, "INVALID device type %d\n", mode);
+	}
+
+	return 0;
+}
+
+static struct platform_driver bs1000_pcie_driver = {
+	.driver = {
+		.name = "baikal-bs1000-pcie",
+		.of_match_table = bs1000_pcie_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = bs1000_pcie_probe
+};
+builtin_platform_driver(bs1000_pcie_driver);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+static void dw_pcie_writel_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg,
+				     u32 val)
+{
+	dw_pcie_writel_atu(pci, PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index) + reg, val);
+}
+
+static void bs1000_pcie_prog_outbound_atu(struct dw_pcie *pci, int index,
+					  int type, u64 cpu_addr, u64 pci_addr,
+					  u32 size, u32 flags)
+{
+	u32 retries, val;
+
+	cpu_addr = bs1000_pcie_cpu_addr_fixup(pci, cpu_addr);
+
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_LOWER_BASE,
+				 lower_32_bits(cpu_addr));
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_UPPER_BASE,
+				 upper_32_bits(cpu_addr));
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_LIMIT,
+				 lower_32_bits(cpu_addr + size - 1));
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_LOWER_TARGET,
+				 lower_32_bits(pci_addr));
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_UPPER_TARGET,
+				 upper_32_bits(pci_addr));
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL1, type);
+	dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2,
+				 PCIE_ATU_ENABLE | flags);
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
+		val = dw_pcie_readl_atu(pci,
+					PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index) +
+					PCIE_ATU_UNR_REGION_CTRL2);
+		if (val & PCIE_ATU_ENABLE)
+			return;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+	dev_err(pci->dev, "Outbound iATU is not being enabled\n");
+}
+
+static void bs1000_pcie_setup_rc_acpi(struct pcie_port *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	acpi_status status;
+	u64 lanes;
+	u32 val;
+
+	/*
+	 * Enable DBI read-only registers for writing/updating configuration.
+	 * Write permission gets disabled towards the end of this function.
+	 */
+	dw_pcie_dbi_ro_wr_en(pci);
+
+	status = acpi_evaluate_integer(to_acpi_device(pci->dev)->handle,
+				       "NUML", NULL, &lanes);
+	if (ACPI_FAILURE(status)) {
+		dev_dbg(pci->dev, "failed to get num-lanes\n");
+	} else {
+		/* Set the number of lanes */
+		val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+		val &= ~PORT_LINK_MODE_MASK;
+		switch (lanes) {
+		case 1:
+			val |= PORT_LINK_MODE_1_LANES;
+			break;
+		case 2:
+			val |= PORT_LINK_MODE_2_LANES;
+			break;
+		case 4:
+			val |= PORT_LINK_MODE_4_LANES;
+			break;
+		case 8:
+			val |= PORT_LINK_MODE_8_LANES;
+			break;
+		default:
+			dev_err(pci->dev, "NUML %llu: invalid value\n", lanes);
+			goto skip_lanes;
+		}
+
+		dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
+		/* Set link width speed control register */
+		val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+		val &= ~PORT_LOGIC_LINK_WIDTH_MASK;
+		switch (lanes) {
+		case 1:
+			val |= PORT_LOGIC_LINK_WIDTH_1_LANES;
+			break;
+		case 2:
+			val |= PORT_LOGIC_LINK_WIDTH_2_LANES;
+			break;
+		case 4:
+			val |= PORT_LOGIC_LINK_WIDTH_4_LANES;
+			break;
+		case 8:
+			val |= PORT_LOGIC_LINK_WIDTH_8_LANES;
+			break;
+		}
+		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+	}
+
+skip_lanes:
+	/* Setup RC BARs */
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0x00000004);
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
+
+	/* Setup interrupt pins */
+	val = dw_pcie_readl_dbi(pci, PCI_INTERRUPT_LINE);
+	val &= 0xffff00ff;
+	val |= 0x00000100;
+	dw_pcie_writel_dbi(pci, PCI_INTERRUPT_LINE, val);
+
+	/* Setup bus numbers */
+	val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
+	val &= 0xff000000;
+	val |= 0x00ff0100;
+	dw_pcie_writel_dbi(pci, PCI_PRIMARY_BUS, val);
+
+	/* Setup command register */
+	val = dw_pcie_readl_dbi(pci, PCI_COMMAND);
+	val &= 0xffff0000;
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+		PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
+	dw_pcie_writel_dbi(pci, PCI_COMMAND, val);
+
+	/* Program ATU */
+	bs1000_pcie_prog_outbound_atu(pci, 0, PCIE_ATU_TYPE_CFG0,
+				      pp->cfg0_base, 0,
+				      pp->cfg0_size,
+				      PCIE_IATU_REGION_CTRL_2_REG_SHIFT_MODE);
+	bs1000_pcie_prog_outbound_atu(pci, 1, PCIE_ATU_TYPE_CFG1,
+				      pp->cfg1_base, 0,
+				      pp->cfg1_size,
+				      PCIE_IATU_REGION_CTRL_2_REG_SHIFT_MODE);
+	bs1000_pcie_prog_outbound_atu(pci, 2, PCIE_ATU_TYPE_MEM,
+				      pp->mem_base, pp->mem_bus_addr,
+				      pp->mem_size, 0);
+	bs1000_pcie_prog_outbound_atu(pci, 3, PCIE_ATU_TYPE_IO,
+				      pp->io_base, pp->io_bus_addr,
+				      pp->io_size, 0);
+
+	dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0);
+
+	/* Program correct class for RC */
+	dw_pcie_writew_dbi(pci, PCI_CLASS_DEVICE, PCI_CLASS_BRIDGE_PCI);
+
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	dw_pcie_dbi_ro_wr_dis(pci);
+}
+
+static int bs1000_pcie_get_res_acpi(struct acpi_device *adev,
+				    struct acpi_device **res_dev,
+				    struct bs1000_pcie *bs)
+{
+	struct device *dev = &adev->dev;
+	struct dw_pcie *pci = bs->pci;
+	struct pcie_port *pp = &bs->pci->pp;
+	struct resource_entry *entry;
+	struct list_head list, *pos;
+	struct fwnode_handle *fwnode;
+	int ret;
+	unsigned long flags = IORESOURCE_MEM;
+
+	fwnode = fwnode_get_named_child_node(&adev->fwnode, "RES0");
+	if (!fwnode) {
+		dev_err(dev, "failed to get RES0 subdevice\n");
+		return -EINVAL;
+	}
+
+	*res_dev = to_acpi_device_node(fwnode);
+	if (!*res_dev) {
+		dev_err(dev, "RES0 is not an acpi device node\n");
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&list);
+	ret = acpi_dev_get_resources(*res_dev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse RES0._CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 3) {
+		dev_err(dev, "invalid number of MEM resources present in RES0._CRS (%i, need 3)\n", ret);
+		return -EINVAL;
+	}
+
+	/* ECAM */
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->cfg0_size = resource_size(entry->res);
+	pp->cfg1_size = pp->cfg0_size;
+	pp->cfg0_base = entry->res->start;
+	pp->cfg1_base = pp->cfg0_base;
+
+	/* DBI */
+	pos = pos->next;
+	entry = list_entry(pos, struct resource_entry, node);
+	if (entry->res->start == BS1000_PCIE0_P0_DBI_BASE ||
+	    entry->res->start == BS1000_PCIE0_P1_DBI_BASE ||
+	    entry->res->start == BS1000_PCIE1_P0_DBI_BASE ||
+	    entry->res->start == BS1000_PCIE1_P1_DBI_BASE ||
+	    entry->res->start == BS1000_PCIE2_P0_DBI_BASE ||
+	    entry->res->start == BS1000_PCIE2_P1_DBI_BASE) {
+		bs->cpu_addr_mask = 0x7fffffffff;
+	} else {
+		bs->cpu_addr_mask = 0xffffffffff;
+	}
+
+	pci->dbi_base = devm_ioremap_resource(dev, entry->res);
+	if (IS_ERR(pci->dbi_base)) {
+		dev_err(dev, "error with dbi ioremap\n");
+		ret = PTR_ERR(pci->dbi_base);
+		return ret;
+	}
+
+	pci->iatu_unroll_enabled = true;
+	pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
+
+	/* APB */
+	pos = pos->next;
+	entry = list_entry(pos, struct resource_entry, node);
+	bs->apb_base = devm_ioremap_resource(dev, entry->res);
+	if (IS_ERR(bs->apb_base)) {
+		dev_err(dev, "error with apb ioremap\n");
+		ret = PTR_ERR(bs->apb_base);
+		return ret;
+	}
+
+	acpi_dev_free_resource_list(&list);
+
+	/* Non-prefetchable memory */
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_MEM;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 1) {
+		dev_err(dev, "invalid number of MEM resources present in _CRS (%i, need 1)\n", ret);
+		return -EINVAL;
+	}
+
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->mem_base = entry->res->start;
+	pp->mem_size = resource_size(entry->res);
+	pp->mem_bus_addr = entry->res->start - entry->offset;
+
+	acpi_dev_free_resource_list(&list);
+
+	/* I/O */
+	INIT_LIST_HEAD(&list);
+	flags = IORESOURCE_IO;
+	ret = acpi_dev_get_resources(adev, &list,
+				     acpi_dev_filter_resource_type_cb,
+				     (void *)flags);
+	if (ret < 0) {
+		dev_err(dev, "failed to parse _CRS method, error code %d\n", ret);
+		return ret;
+	}
+
+	if (ret != 1) {
+		dev_err(dev, "invalid number of IO resources present in _CRS (%i, need 1)\n", ret);
+		return -EINVAL;
+	}
+
+	pos = list.next;
+	entry = list_entry(pos, struct resource_entry, node);
+	pp->io_base = entry->res->start;
+	pp->io_size = resource_size(entry->res);
+	pp->io_bus_addr = entry->res->start - entry->offset;
+
+	acpi_dev_free_resource_list(&list);
+	return 0;
+}
+
+static int bs1000_pcie_get_irq_acpi(struct device *dev,
+				    struct acpi_device *res_dev,
+				    struct bs1000_pcie *bs)
+{
+	struct pcie_port *pp = &bs->pci->pp;
+	struct resource res;
+	int ret;
+
+	memset(&res, 0, sizeof(res));
+
+	ret = acpi_irq_get(res_dev->handle, 0, &res);
+	if (ret) {
+		dev_err(dev, "failed to get irq %d\n", 0);
+		return ret;
+	}
+
+	if (res.flags & IORESOURCE_BITS) {
+		struct irq_data *irqd;
+
+		irqd = irq_get_irq_data(res.start);
+		if (!irqd) {
+			return -ENXIO;
+		}
+
+		irqd_set_trigger_type(irqd, res.flags & IORESOURCE_BITS);
+	}
+
+	pp->irq = res.start;
+
+	ret = devm_request_irq(dev, pp->irq, bs1000_pcie_intr_irq_handler,
+			       IRQF_SHARED, "bs1000-pcie-intr", bs);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ %d\n", pp->irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int bs1000_get_acpi_data(struct device *dev, struct bs1000_pcie *bs)
+{
+	struct acpi_device *adev = to_acpi_device(dev), *res_dev;
+	acpi_status status = AE_OK;
+	u64 num_viewport;
+	int ret;
+
+	ret = bs1000_pcie_get_res_acpi(adev, &res_dev, bs);
+	if (ret) {
+		dev_err(dev, "failed to get resource info\n");
+		return ret;
+	}
+
+	ret = bs1000_pcie_get_irq_acpi(dev, res_dev, bs);
+	if (ret) {
+		dev_err(dev, "failed to get irq info\n");
+		return ret;
+	}
+
+	status = acpi_evaluate_integer(adev->handle, "NUMV", NULL,
+				       &num_viewport);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get num-viewport\n");
+		return -EINVAL;
+	}
+	bs->pci->num_viewport = num_viewport;
+
+	return 0;
+}
+
+static int bs1000_pcie_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct bs1000_pcie *bs;
+	struct dw_pcie *pci;
+	struct pcie_port *pp;
+	int ret;
+
+	pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
+	if (!pci)
+		return -ENOMEM;
+
+	pci->dev = dev;
+	pci->ops = &bs1000_pcie_ops;
+
+	bs = devm_kzalloc(dev, sizeof(*bs), GFP_KERNEL);
+	if (!bs)
+		return -ENOMEM;
+
+	cfg->priv = bs;
+	bs->pci = pci;
+	dev_set_drvdata(dev, bs);
+
+	ret = bs1000_get_acpi_data(dev, bs);
+	if (ret) {
+		dev_err(dev, "failed to get data from ACPI\n");
+		return ret;
+	}
+
+	pp = &pci->pp;
+	raw_spin_lock_init(&pp->lock);
+	pp->ops = &bs1000_pcie_host_ops;
+	pp->root_bus_nr = cfg->busr.start;
+	pp->busn = &cfg->busr;
+	pp->va_cfg0_base = devm_pci_remap_cfgspace(dev, pp->cfg0_base,
+						   pp->cfg0_size);
+	if (!pp->va_cfg0_base) {
+		dev_err(dev, "error with ioremap\n");
+		return -ENOMEM;
+	}
+	pp->va_cfg1_base = pp->va_cfg0_base;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_err(dev, "failed to enable DMA\n");
+		return ret;
+	}
+
+	ret = bs1000_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __iomem *bs1000_pcie_map_bus(struct pci_bus *bus,
+					 unsigned int devfn, int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct bs1000_pcie *bs = cfg->priv;
+	unsigned int devfn_shift = cfg->ops->bus_shift - 8;
+	unsigned int busn = bus->number;
+	void __iomem *base;
+
+	if (bs->pci->pp.root_bus == NULL)
+		bs->pci->pp.root_bus = bus;
+
+	if (bus->number != cfg->busr.start && !bs1000_pcie_link_up(bs->pci))
+		return NULL;
+
+	if (bus->number == cfg->busr.start) {
+		/*
+		 * The DW PCIe core doesn't filter out transactions to other
+		 * devices/functions on the root bus num, so we do this here.
+		 */
+		if (PCI_SLOT(devfn) > 0)
+			return NULL;
+		else
+			return bs->pci->dbi_base + where;
+	}
+
+	if (busn < cfg->busr.start || busn > cfg->busr.end)
+		return NULL;
+
+	busn -= cfg->busr.start;
+	base = cfg->win + (busn << cfg->ops->bus_shift);
+	return base + (devfn << devfn_shift) + where;
+}
+
+struct pci_ecam_ops baikal_s_pcie_ecam_ops = {
+	.bus_shift	= 20,
+	.init		= bs1000_pcie_init,
+	.pci_ops	= {
+		.map_bus	= bs1000_pcie_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write
+	}
+};
+#endif
diff --git a/drivers/pci/controller/dwc/pcie-baikal-tune.c b/drivers/pci/controller/dwc/pcie-baikal-tune.c
new file mode 100644
index 0000000000000..e11763a25dfd5
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-baikal-tune.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BM1000 PCIe Controller & Phy Gen3 equalization parameters fine tune.
+ *
+ * Copyright (C) 2023 Baikal Electronics, JSC
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/debugfs.h>
+
+#include "pcie-designware.h"
+#include "pcie-baikal.h"
+
+static int gen3_eq_fb_mode = -1;
+module_param(gen3_eq_fb_mode, int, 0444);
+MODULE_PARM_DESC(gen3_eq_fb_mode, "feedback mode (0 - FOM + DIR, 1 - FOM only)");
+static int gen3_eq_psets = -1;
+module_param(gen3_eq_psets, int, 0444);
+MODULE_PARM_DESC(gen3_eq_psets, "initial presets");
+static int phy_rx_agc = -1;
+module_param(phy_rx_agc, int, 0444);
+MODULE_PARM_DESC(phy_rx_agc,
+	"Phy RX AGC gain ([7:4] - Pre-CTLE gain, [3:0] - Post-CTLE gain)");
+static int phy_rx_ctle = -1;
+module_param(phy_rx_ctle, int, 0444);
+MODULE_PARM_DESC(phy_rx_ctle, "Phy RX CTLE control ([7:4] - zero, [3:0] - pole)");
+static int phy_rx_dfe = -1;
+module_param(phy_rx_dfe, int, 0444);
+MODULE_PARM_DESC(phy_rx_dfe, "Phy RX DFE control (0 - disable, 1 - enable)");
+static int phy_tx_gain = -1;
+module_param(phy_tx_gain, int, 0444);
+MODULE_PARM_DESC(phy_tx_gain, "Phy TX gain value");
+static int phy_tx_turbo = -1;
+module_param(phy_tx_turbo, int, 0444);
+MODULE_PARM_DESC(phy_tx_turbo, "Phy TX turbo mode (0 - disable, 1 - enable)");
+static int phy_rx_ctle_pole = -1;
+module_param(phy_rx_ctle_pole, int, 0444);
+MODULE_PARM_DESC(phy_rx_ctle_pole, "Phy RX CTLE pole range for VMA adaptation "
+				   "([7:4] - max, [3:0] - min)");
+static bool debugfs = false;
+module_param(debugfs, bool, 0444);
+MODULE_PARM_DESC(debugfs, "Phy debugfs monitor enable");
+static bool notune = false;
+module_param(notune, bool, 0444);
+MODULE_PARM_DESC(notune, "Gen3 equalization fine tune disable");
+
+static void bm1000_pcie_tune_debugfs_populate(struct dw_pcie *pci);
+
+/* Baikal-M PCIe PHY registers access */
+#define BM1000_PCIE_AXI2MGM_LINENUM			0xd04
+#define BM1000_PCIE_AXI2MGM_LINENUM_LANE_SEL_MASK	GENMASK(7, 0)
+#define BM1000_PCIE_AXI2MGM_ADDRCTL			0xd08
+#define BM1000_PCIE_AXI2MGM_ADDRCTL_BUSY		BIT(31)
+#define BM1000_PCIE_AXI2MGM_ADDRCTL_DONE		BIT(30)
+#define BM1000_PCIE_AXI2MGM_ADDRCTL_RW_FLAG		BIT(29)
+#define BM1000_PCIE_AXI2MGM_ADDRCTL_PHY_ADDR_MASK	GENMASK(20, 0)
+#define BM1000_PCIE_AXI2MGM_WRITEDATA			0xd0c
+#define BM1000_PCIE_AXI2MGM_WRITEDATA_DATA_MASK		GENMASK(15, 0)
+#define BM1000_PCIE_AXI2MGM_READDATA			0xd10
+#define BM1000_PCIE_AXI2MGM_READDATA_DATA_MASK		GENMASK(15, 0)
+
+#define BM1000_PCIE_PHY_REG_RETRIES		10
+#define BM1000_PCIE_PHY_REG_RETRY_TIMEOUT	100
+
+static int bm1000_pcie_phy_done(struct dw_pcie *pci)
+{
+	u32 reg;
+	int retries;
+
+	for (retries = 0; retries < BM1000_PCIE_PHY_REG_RETRIES; ++retries) {
+		reg = dw_pcie_readl_dbi(pci, BM1000_PCIE_AXI2MGM_ADDRCTL);
+		if (reg & BM1000_PCIE_AXI2MGM_ADDRCTL_DONE)
+			return 0;
+		udelay(BM1000_PCIE_PHY_REG_RETRY_TIMEOUT);
+	}
+	return -ETIMEDOUT;
+}
+
+static int bm1000_pcie_phy_read(struct dw_pcie *pci, u8 lane,
+				u32 addr, u16 *value)
+{
+	int ret;
+
+	bm1000_pcie_phy_enable(pci);
+	dw_pcie_writel_dbi(pci, BM1000_PCIE_AXI2MGM_LINENUM, lane);
+	dw_pcie_writel_dbi(pci, BM1000_PCIE_AXI2MGM_ADDRCTL,
+			   addr & BM1000_PCIE_AXI2MGM_ADDRCTL_PHY_ADDR_MASK);
+	ret = bm1000_pcie_phy_done(pci);
+	if (ret == 0)
+		*value = dw_pcie_readl_dbi(pci, BM1000_PCIE_AXI2MGM_READDATA) &
+			 BM1000_PCIE_AXI2MGM_READDATA_DATA_MASK;
+	bm1000_pcie_phy_disable(pci);
+	return ret;
+}
+
+static int bm1000_pcie_phy_write(struct dw_pcie *pci, u8 lanes,
+				 u32 addr, u16 value)
+{
+	int ret;
+
+	bm1000_pcie_phy_enable(pci);
+	dw_pcie_writel_dbi(pci, BM1000_PCIE_AXI2MGM_LINENUM, lanes);
+	dw_pcie_writel_dbi(pci, BM1000_PCIE_AXI2MGM_WRITEDATA, value);
+	dw_pcie_writel_dbi(pci, BM1000_PCIE_AXI2MGM_ADDRCTL,
+			   (addr & BM1000_PCIE_AXI2MGM_ADDRCTL_PHY_ADDR_MASK) |
+			   BM1000_PCIE_AXI2MGM_ADDRCTL_RW_FLAG);
+	ret = bm1000_pcie_phy_done(pci);
+	bm1000_pcie_phy_disable(pci);
+	return ret;
+}
+
+/* Baikal-M PCIe RX/TX equalizers fine tune */
+#define BM1000_PCIE_GEN3_EQ_CONTROL			0x8a8
+#define BM1000_PCIE_GEN3_EQ_FOM_INC_INITIAL_EVAL	BIT(24)
+#define BM1000_PCIE_GEN3_EQ_PSET_REQ_VEC_MASK		GENMASK(23, 8)
+#define BM1000_PCIE_GEN3_EQ_FB_MODE_MASK		GENMASK(3, 0)
+
+#define BM1000_PCIE_PHY_RX_CFG_2					0x18002
+#define BM1000_PCIE_PHY_RX_CFG_2_PCS_SDS_RX_AGC_MVAL			GENMASK(9, 0)
+#define BM1000_PCIE_PHY_RX_CFG_5					0x18005
+#define BM1000_PCIE_PHY_RX_CFG_5_RX_AGC_MEN_OVRRD_EN			BIT(4)
+#define BM1000_PCIE_PHY_RX_CFG_5_RX_AGC_MEN_OVRRD_VAL			BIT(3)
+#define BM1000_PCIE_PHY_RX_LOOP_CTRL					0x18009
+#define BM1000_PCIE_PHY_RX_LOOP_CTRL_CFG_RX_LCTRL_LCTRL_MEN		BIT(8)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL					0x1800b
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_ZERO_MASK		GENMASK(13, 10)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_RX_CTLE_POLE_OVRRD_EN		BIT(9)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_RX_CTLE_POLE_OVRRD_VAL_MASK	GENMASK(8, 5)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MAX_MASK	GENMASK(4, 3)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MIN_MASK	GENMASK(2, 1)
+#define BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_STEP		BIT(0)
+#define BM1000_PCIE_PHY_TX_CFG_1					0x18016
+#define BM1000_PCIE_PHY_TX_CFG_1_TX_VBOOST_EN_OVRRD_EN			BIT(11)
+#define BM1000_PCIE_PHY_TX_CFG_1_TX_TURBO_EN_OVRRD_EN			BIT(10)
+#define BM1000_PCIE_PHY_TX_CFG_3					0x18018
+#define BM1000_PCIE_PHY_TX_CFG_3_CFG_TX_VBOOST_EN			BIT(14)
+#define BM1000_PCIE_PHY_TX_CFG_3_PCS_SDS_TX_GAIN_MASK			GENMASK(6, 4)
+#define BM1000_PCIE_PHY_TX_CFG_3_CFG_TX_TURBO_EN			BIT(0)
+#define BM1000_PCIE_PHY_RX_PWR_MON_1					0x1802a
+#define BM1000_PCIE_PHY_RX_PWR_MON_1_RX_PWRSM_LANE_PWR_OFF		BIT(4)
+#define BM1000_PCIE_PHY_TX_PWR_MON_0					0x1802c
+#define BM1000_PCIE_PHY_TX_PWR_MON_0_TX_PWRSM_LANE_PWR_OFF		BIT(15)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_0					0x18048
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_1					0x18049
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2					0x1804a
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_OVRD_EN			BIT(5)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C5_MEN_OVRD_VAL		BIT(4)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C4_MEN_OVRD_VAL		BIT(3)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C3_MEN_OVRD_VAL		BIT(2)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C2_MEN_OVRD_VAL		BIT(1)
+#define BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C1_MEN_OVRD_VAL		BIT(0)
+
+void bm1000_pcie_tune(struct dw_pcie *pci)
+{
+	struct device *dev = pci->dev;
+	struct device_node *np = dev->of_node;
+	u16 exp_cap_off;
+	u8 lane = 0, lanes = 0;
+	int override;
+	int i, n_lanes;
+	int ret;
+
+	if (notune)
+		return;
+
+	/* Search for active lanes */
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	n_lanes = FIELD_GET(PCI_EXP_LNKCAP_MLW,
+			    dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKCAP));
+	for (i = 0; i < n_lanes; i++) {
+		u16 reg;
+		u8 mask = 1 << i;
+
+		ret = bm1000_pcie_phy_read(pci, mask,
+					   BM1000_PCIE_PHY_RX_PWR_MON_1, &reg);
+		if (ret != 0 ||
+		    FIELD_GET(BM1000_PCIE_PHY_RX_PWR_MON_1_RX_PWRSM_LANE_PWR_OFF,
+			      reg) == 1)
+			continue;
+		ret = bm1000_pcie_phy_read(pci, mask,
+					   BM1000_PCIE_PHY_TX_PWR_MON_0, &reg);
+		if (ret != 0 ||
+		    FIELD_GET(BM1000_PCIE_PHY_TX_PWR_MON_0_TX_PWRSM_LANE_PWR_OFF,
+			      reg) == 1)
+			continue;
+		lanes |= mask;
+		if (lane == 0)
+			lane = mask;
+	}
+
+	/* Feedback mode */
+	override = gen3_eq_fb_mode;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,gen3-eq-fb-mode", &override);
+	if (override >= 0) {
+		u32 reg;
+
+		dev_dbg(dev, "Gen3 fb_mode = %d\n", override);
+		reg = dw_pcie_readl_dbi(pci, BM1000_PCIE_GEN3_EQ_CONTROL);
+		reg &= ~BM1000_PCIE_GEN3_EQ_FB_MODE_MASK;
+		reg |= FIELD_PREP(BM1000_PCIE_GEN3_EQ_FB_MODE_MASK, override);
+		dw_pcie_writel_dbi(pci, BM1000_PCIE_GEN3_EQ_CONTROL, reg);
+	}
+	/* Initial presets */
+	override = gen3_eq_psets;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,gen3-eq-psets", &override);
+	if (override >= 0) {
+		u32 reg;
+
+		reg = dw_pcie_readl_dbi(pci, BM1000_PCIE_GEN3_EQ_CONTROL);
+		dev_dbg(dev, "Gen3 initial presets = 0x%x\n", override);
+		reg &= ~(BM1000_PCIE_GEN3_EQ_PSET_REQ_VEC_MASK |
+			 BM1000_PCIE_GEN3_EQ_FOM_INC_INITIAL_EVAL);
+		reg |= FIELD_PREP(BM1000_PCIE_GEN3_EQ_PSET_REQ_VEC_MASK,
+				  override);
+		dw_pcie_writel_dbi(pci, BM1000_PCIE_GEN3_EQ_CONTROL, reg);
+	}
+	/* Phy RX AGC */
+	override = phy_rx_agc;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-rx-agc", &override);
+	if (override >= 0) {
+		u16 reg;
+
+		ret = bm1000_pcie_phy_read(pci, lane, BM1000_PCIE_PHY_RX_CFG_2,
+					   &reg);
+		if (ret == 0) {
+			reg &= ~BM1000_PCIE_PHY_RX_CFG_2_PCS_SDS_RX_AGC_MVAL;
+			reg |= FIELD_PREP(
+				BM1000_PCIE_PHY_RX_CFG_2_PCS_SDS_RX_AGC_MVAL,
+				override);
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_RX_CFG_2,
+						    reg);
+		}
+		if (ret == 0)
+			ret = bm1000_pcie_phy_read(pci, lane,
+						   BM1000_PCIE_PHY_RX_CFG_5,
+						   &reg);
+		if (ret == 0) {
+			reg |= BM1000_PCIE_PHY_RX_CFG_5_RX_AGC_MEN_OVRRD_EN |
+			       BM1000_PCIE_PHY_RX_CFG_5_RX_AGC_MEN_OVRRD_VAL;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_RX_CFG_5,
+						    reg);
+		}
+		dev_dbg(dev, "Phy RX AGC = 0x%04x (%d)\n", override, ret);
+	}
+	/* Rhy RX CTLE */
+	override = phy_rx_ctle;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-rx-ctle", &override);
+	if (override >= 0) {
+		u16 reg;
+
+		ret = bm1000_pcie_phy_read(pci, lane,
+					   BM1000_PCIE_PHY_RX_CTLE_CTRL, &reg);
+		if (ret == 0) {
+			reg &= ~(BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_ZERO_MASK |
+				 BM1000_PCIE_PHY_RX_CTLE_CTRL_RX_CTLE_POLE_OVRRD_VAL_MASK);
+			reg |= BM1000_PCIE_PHY_RX_CTLE_CTRL_RX_CTLE_POLE_OVRRD_EN |
+			       FIELD_PREP(BM1000_PCIE_PHY_RX_CTLE_CTRL_RX_CTLE_POLE_OVRRD_VAL_MASK,
+					  override & 0xf) |
+			       FIELD_PREP(BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_ZERO_MASK,
+					  override >> 4);
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_RX_CTLE_CTRL,
+						    reg);
+		}
+		if (ret == 0)
+			ret = bm1000_pcie_phy_read(pci, lane,
+						   BM1000_PCIE_PHY_RX_LOOP_CTRL,
+						   &reg);
+		if (ret == 0) {
+			reg |= BM1000_PCIE_PHY_RX_LOOP_CTRL_CFG_RX_LCTRL_LCTRL_MEN;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_RX_LOOP_CTRL,
+						    reg);
+		}
+		dev_dbg(dev, "Phy RX CTLE = 0x%04x (%d)\n", override, ret);
+	}
+	/* Phy RX DFE */
+	override = phy_rx_dfe;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-rx-dfe", &override);
+	if (override == 0) { /* enabled by default - disable only */
+		u16 reg;
+
+		reg = BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_OVRD_EN;
+		reg |= BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C1_MEN_OVRD_VAL |
+		       BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C2_MEN_OVRD_VAL |
+		       BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C3_MEN_OVRD_VAL |
+		       BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C4_MEN_OVRD_VAL |
+		       BM1000_PCIE_PHY_RX_AEQ_VALBBD_2_RX_DFE_C5_MEN_OVRD_VAL;
+		ret = bm1000_pcie_phy_write(pci, lanes,
+					    BM1000_PCIE_PHY_RX_AEQ_VALBBD_2,
+					    reg);
+		if (ret == 0)
+			ret = bm1000_pcie_phy_read(pci, lane,
+						   BM1000_PCIE_PHY_RX_LOOP_CTRL,
+						   &reg);
+		if (ret == 0) {
+			reg |= BM1000_PCIE_PHY_RX_LOOP_CTRL_CFG_RX_LCTRL_LCTRL_MEN;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_RX_LOOP_CTRL,
+						    reg);
+		}
+		if (ret == 0) {
+			reg = 0;
+			bm1000_pcie_phy_write(pci, lanes,
+					      BM1000_PCIE_PHY_RX_AEQ_VALBBD_0,
+					      reg);
+			bm1000_pcie_phy_write(pci, lanes,
+					      BM1000_PCIE_PHY_RX_AEQ_VALBBD_1,
+					      reg);
+		}
+		dev_dbg(dev, "Phy RX DFE = %d (%d)\n", override, ret);
+	}
+	/* Phy TX gain */
+	override = phy_tx_gain;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-tx-gain", &override);
+	if (override >= 0) {
+		u16 reg;
+
+		ret = bm1000_pcie_phy_read(pci, lane, BM1000_PCIE_PHY_TX_CFG_3,
+					   &reg);
+		if (ret == 0) {
+			reg &= ~BM1000_PCIE_PHY_TX_CFG_3_PCS_SDS_TX_GAIN_MASK;
+			reg |= BM1000_PCIE_PHY_TX_CFG_3_CFG_TX_VBOOST_EN;
+			reg |= FIELD_PREP(BM1000_PCIE_PHY_TX_CFG_3_PCS_SDS_TX_GAIN_MASK,
+					  override);
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_TX_CFG_3,
+						    reg);
+		}
+		if (ret == 0)
+			ret = bm1000_pcie_phy_read(pci, lane,
+						   BM1000_PCIE_PHY_TX_CFG_1,
+						   &reg);
+		if (ret == 0) {
+			reg |= BM1000_PCIE_PHY_TX_CFG_1_TX_VBOOST_EN_OVRRD_EN;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_TX_CFG_1,
+						    reg);
+		}
+		dev_dbg(dev, "Phy TX gain = 0x%x (%d)\n", override, ret);
+	}
+	/* Phy TX turbo */
+	override = phy_tx_turbo;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-tx-turbo", &override);
+	if (override >= 0) {
+		u16 reg;
+
+		ret = bm1000_pcie_phy_read(pci, lane, BM1000_PCIE_PHY_TX_CFG_3,
+					   &reg);
+		if (ret == 0) {
+			if (override == 0)
+				reg &= ~BM1000_PCIE_PHY_TX_CFG_3_CFG_TX_TURBO_EN;
+			else
+				reg |= BM1000_PCIE_PHY_TX_CFG_3_CFG_TX_TURBO_EN;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_TX_CFG_3,
+						    reg);
+		}
+		if (ret == 0)
+			ret = bm1000_pcie_phy_read(pci, lane,
+						   BM1000_PCIE_PHY_TX_CFG_1,
+						   &reg);
+		if (ret == 0) {
+			reg |= BM1000_PCIE_PHY_TX_CFG_1_TX_TURBO_EN_OVRRD_EN;
+			ret = bm1000_pcie_phy_write(pci, lanes,
+						    BM1000_PCIE_PHY_TX_CFG_1,
+						    reg);
+		}
+		dev_dbg(dev, "Phy TX turbo = %d (%d)\n", override, ret);
+	}
+	/* Phy RX CTLE pole range */
+	override = phy_rx_ctle_pole;
+	if (override == -1)
+		of_property_read_u32(np, "bm1000,phy-rx-ctle-pole", &override);
+	if (override >= 0) {
+		u16 reg;
+		u8 pole_max = (override >> 4) & 0xf, pole_min = override & 0xf;
+
+		ret = bm1000_pcie_phy_read(pci, lane, BM1000_PCIE_PHY_RX_CTLE_CTRL,
+					   &reg);
+		if (ret == 0) {
+			reg &= ~(BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MAX_MASK |
+				 BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MIN_MASK);
+			reg |= FIELD_PREP(BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MAX_MASK,
+					  pole_max);
+			reg |= FIELD_PREP(BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_MIN_MASK,
+					  pole_min);
+			if (pole_max == pole_min)
+				reg &= ~BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_STEP;
+			else
+				reg |= BM1000_PCIE_PHY_RX_CTLE_CTRL_PCS_SDS_RX_CTLE_POLE_STEP;
+			ret = bm1000_pcie_phy_write(pci, lanes, BM1000_PCIE_PHY_RX_CTLE_CTRL,
+						    reg);
+		}
+		dev_dbg(dev, "Phy RX CTLE pole = 0x%04x (%d)\n", override, ret);
+	}
+
+	/* debugfs populate */
+	if (debugfs)
+		bm1000_pcie_tune_debugfs_populate(pci);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define BM1000_PCIE_PHY_SDS_PIN_MON_1				0x18027
+#define BM1000_PCIE_PHY_SDS_PIN_MON_1_PCS_SDS_TX_SWING		GENMASK(5, 1)
+#define BM1000_PCIE_PHY_SDS_PIN_MON_2				0x18028
+#define BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_VBOOST_EN	BIT(10)
+#define BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_TURBO_EN	BIT(9)
+#define BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_POST_CURSOR	GENMASK(8, 4)
+#define BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_PRE_CURSOR	GENMASK(3, 0)
+#define BM1000_PCIE_PHY_RX_PWR_MON_0				0x18029
+#define BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_AGC_EN		BIT(10)
+#define BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_DFE_EN		BIT(9)
+#define BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_CDR_EN		BIT(8)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_0				0x18050
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_0_DFE_TAP5			GENMASK(9, 5)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_0_DFE_TAP4			GENMASK(4, 0)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_1				0x18051
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP3			GENMASK(14, 10)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP2			GENMASK(9, 5)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP1			GENMASK(4, 0)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_2				0x18052
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_2_PRE_CTLE_GAIN		GENMASK(7, 4)
+#define BM1000_PCIE_PHY_RX_AEQ_OUT_2_POST_CTLE_GAIN		GENMASK(3, 0)
+#define BM1000_PCIE_PHY_RX_VMA_STATUS_0				0x18057
+#define BM1000_PCIE_PHY_RX_VMA_STATUS_0_CTLE_PEAK		GENMASK(5, 2)
+#define BM1000_PCIE_PHY_RX_VMA_STATUS_0_CTLE_POLE		GENMASK(1, 0)
+
+static void print_for_all_lanes(struct dw_pcie *pci, struct seq_file *s,
+				u8 n_lanes, u32 addr, u16 mask)
+{
+	int i;
+
+	for (i = 0; i < n_lanes; i++) {
+		u16 reg;
+		u8 lane = 1 << i;
+
+		if (bm1000_pcie_phy_read(pci, lane, addr, &reg) == 0)
+			seq_put_hex_ll(s, " ", (reg & mask) >> __bf_shf(mask), 0);
+		else
+			seq_puts(s, " ?");
+	}
+}
+
+static int bm1000_pcie_dbgfs_phy_mon_show(struct seq_file *s, void *data)
+{
+	struct dw_pcie *pci = s->private;
+	u8 n_lanes;
+	u16 exp_cap_off;
+
+	exp_cap_off = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	n_lanes = FIELD_GET(PCI_EXP_LNKCAP_MLW,
+			    dw_pcie_readw_dbi(pci, exp_cap_off + PCI_EXP_LNKCAP));
+
+	seq_puts(s, "sds_pin_mon:\n");
+	seq_puts(s, " tx_swing:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_SDS_PIN_MON_1,
+			    BM1000_PCIE_PHY_SDS_PIN_MON_1_PCS_SDS_TX_SWING);
+	seq_puts(s, "\n");
+	seq_puts(s, " pre_cursor:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_SDS_PIN_MON_2,
+			    BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_PRE_CURSOR);
+	seq_puts(s, "\n");
+	seq_puts(s, " post_cursor:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_SDS_PIN_MON_2,
+			    BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_POST_CURSOR);
+	seq_puts(s, "\n");
+	seq_puts(s, " vboost_en:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_SDS_PIN_MON_2,
+			    BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_VBOOST_EN);
+	seq_puts(s, "\n");
+	seq_puts(s, " turbo_en:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_SDS_PIN_MON_2,
+			    BM1000_PCIE_PHY_SDS_PIN_MON_2_PCS_SDS_TX_TURBO_EN);
+	seq_puts(s, "\n");
+	seq_puts(s, "rx_vma_status:\n");
+	seq_puts(s, " ctle_peak:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_VMA_STATUS_0,
+			    BM1000_PCIE_PHY_RX_VMA_STATUS_0_CTLE_PEAK);
+	seq_puts(s, "\n");
+	seq_puts(s, " ctle_pole:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_VMA_STATUS_0,
+			    BM1000_PCIE_PHY_RX_VMA_STATUS_0_CTLE_POLE);
+	seq_puts(s, "\n");
+	seq_puts(s, "rx_aeq_out:\n");
+	seq_puts(s, " pre_ctle_gain:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_2,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_2_PRE_CTLE_GAIN);
+	seq_puts(s, "\n");
+	seq_puts(s, " post_ctle_gain:");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_2,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_2_POST_CTLE_GAIN);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_tap1:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_1,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP1);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_tap2:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_1,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP2);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_tap3:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_1,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_1_DFE_TAP3);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_tap4:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_0,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_0_DFE_TAP4);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_tap5:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_AEQ_OUT_0,
+			    BM1000_PCIE_PHY_RX_AEQ_OUT_0_DFE_TAP5);
+	seq_puts(s, "\n");
+	seq_puts(s, "pwr_mon:\n");
+	seq_puts(s, " tx_pwr_off:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_TX_PWR_MON_0,
+			    BM1000_PCIE_PHY_TX_PWR_MON_0_TX_PWRSM_LANE_PWR_OFF);
+	seq_puts(s, "\n");
+	seq_puts(s, " rx_pwr_off:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_PWR_MON_1,
+			    BM1000_PCIE_PHY_RX_PWR_MON_1_RX_PWRSM_LANE_PWR_OFF);
+	seq_puts(s, "\n");
+	seq_puts(s, " agc_en:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_PWR_MON_0,
+			    BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_AGC_EN);
+	seq_puts(s, "\n");
+	seq_puts(s, " dfe_en:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_PWR_MON_0,
+			    BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_DFE_EN);
+	seq_puts(s, "\n");
+	seq_puts(s, " cdr_en:\t");
+	print_for_all_lanes(pci, s, n_lanes, BM1000_PCIE_PHY_RX_PWR_MON_0,
+			    BM1000_PCIE_PHY_RX_PWR_MON_0_RX_PWRSM_CDR_EN);
+	seq_puts(s, "\n");
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(bm1000_pcie_dbgfs_phy_mon);
+
+static void bm1000_pcie_tune_debugfs_populate(struct dw_pcie *pci)
+{
+	struct dentry *root_dir;
+
+	root_dir = debugfs_create_dir(dev_name(pci->dev), NULL);
+	if (!root_dir) {
+		dev_warn(pci->dev, "%s: failed to create debugfs dir\n",
+			 __func__);
+		return;
+	}
+	if (!debugfs_create_file("phy_mon", S_IRUGO, root_dir, pci,
+				 &bm1000_pcie_dbgfs_phy_mon_fops))
+		dev_warn(pci->dev, "%s: failed to create phy_mon debugfs file\n",
+			 __func__);
+}
+
+#else
+
+static void bm1000_pcie_tune_debugfs_populate(struct dw_pcie *pci)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/pci/controller/dwc/pcie-baikal.h b/drivers/pci/controller/dwc/pcie-baikal.h
new file mode 100644
index 0000000000000..57834b6899eda
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-baikal.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe controller driver for Baikal Electronics SoCs
+ *
+ * Copyright (C) 2023 Baikal Electronics, JSC
+ */
+
+#ifndef _PCIE_BAIKAL_H
+#define _PCIE_BAIKAL_H
+
+void bm1000_pcie_phy_enable(struct dw_pcie *pci);
+void bm1000_pcie_phy_disable(struct dw_pcie *pci);
+
+void bm1000_pcie_tune(struct dw_pcie *pci);
+
+#endif /* _PCIE_BAIKAL_H */
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 0263db2ac8746..40c2e4ce4f6a4 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -69,5 +69,6 @@ source "drivers/phy/socionext/Kconfig"
 source "drivers/phy/st/Kconfig"
 source "drivers/phy/tegra/Kconfig"
 source "drivers/phy/ti/Kconfig"
+source "drivers/phy/baikal/Kconfig"
 
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index c96a1afc95bd5..b8c6f4c717666 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -27,4 +27,5 @@ obj-y					+= broadcom/	\
 					   samsung/	\
 					   socionext/	\
 					   st/		\
-					   ti/
+					   baikal/	\
+					   ti/
\ No newline at end of file
diff --git a/drivers/phy/baikal/Kconfig b/drivers/phy/baikal/Kconfig
new file mode 100644
index 0000000000000..b8b598d6019e1
--- /dev/null
+++ b/drivers/phy/baikal/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config USB_PHY_BAIKAL
+	tristate "Baikal USB PHY driver"
+	depends on USB_SUPPORT
+	select GENERIC_PHY
+	select USB_PHY
+	help
+	  Enable this to support the USB PHY on Baikal SoCs.
+	  This driver controls both the USB2 PHY and the USB3 PHY.
\ No newline at end of file
diff --git a/drivers/phy/baikal/Makefile b/drivers/phy/baikal/Makefile
new file mode 100644
index 0000000000000..54fcd11988681
--- /dev/null
+++ b/drivers/phy/baikal/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_USB_PHY_BAIKAL)		+= baikal-usb-phy.o
diff --git a/drivers/phy/baikal/baikal-usb-phy.c b/drivers/phy/baikal/baikal-usb-phy.c
new file mode 100644
index 0000000000000..fefc307cfa07b
--- /dev/null
+++ b/drivers/phy/baikal/baikal-usb-phy.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal USB PHY driver
+ *
+ * Copyright (C) 2022-2023 Baikal Electronics, JSC
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <dt-bindings/phy/phy.h>
+
+struct phy_baikal_desc {
+	struct phy		*phy;
+	int			index;
+	bool			enable;
+};
+
+struct phy_baikal_priv {
+	struct phy_baikal_desc	**phys;
+	int			nphys;
+};
+
+static int phy_baikal_init(struct phy *phy)
+{
+	struct phy_baikal_desc *desc = phy_get_drvdata(phy);
+
+	if (desc->enable) {
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+static const struct phy_ops phy_baikal_ops = {
+	.init	= phy_baikal_init,
+	.owner	= THIS_MODULE,
+};
+
+static struct phy *phy_baikal_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	int i;
+	struct phy_baikal_priv *priv = dev_get_drvdata(dev);
+
+	for (i = 0; i < priv->nphys; i++) {
+		if (priv->phys[i]->index == args->args[0])
+			break;
+	}
+
+	return priv->phys[i]->phy;
+}
+
+static int phy_baikal_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *child;
+	struct phy *phy;
+	struct phy_baikal_priv *priv;
+	struct phy_baikal_desc *phy_desc;
+	struct phy_provider *phy_provider;
+	int index;
+	int i = 0;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	priv->nphys = of_get_child_count(dev->of_node);
+	priv->phys = devm_kcalloc(dev, priv->nphys, sizeof(*priv->phys), GFP_KERNEL);
+
+	dev_set_drvdata(dev, priv);
+	for_each_available_child_of_node(dev->of_node, child) {
+		of_property_read_u32(child, "reg", &index);
+		phy = devm_phy_create(dev, NULL, &phy_baikal_ops);
+		phy_desc = devm_kzalloc(dev, sizeof(*phy_desc), GFP_KERNEL);
+		phy_desc->phy = phy;
+		phy_desc->index = index;
+		phy_desc->enable = of_property_read_bool(child, "enable");
+		priv->phys[i++] = phy_desc;
+		phy_set_drvdata(phy, phy_desc);
+	}
+
+	phy_provider = devm_of_phy_provider_register(dev, phy_baikal_xlate);
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static const struct of_device_id phy_baikal_table[] = {
+	{ .compatible = "baikal,bm1000-usb-phy" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, phy_baikal_table);
+
+static struct platform_driver phy_baikal_driver = {
+	.probe = phy_baikal_probe,
+	.driver = {
+		.name = "baikal,bm1000-usb-phy",
+		.of_match_table = phy_baikal_table,
+	},
+};
+module_platform_driver(phy_baikal_driver);
+
+MODULE_DESCRIPTION("Baikal USB PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 5bf7542087776..572aed1bc6d49 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -117,6 +117,9 @@ config SPI_AXI_SPI_ENGINE
 	  It is part of the SPI Engine framework that is used in some Analog Devices
 	  reference designs for FPGAs.
 
+config SPI_BAIKAL_ESPI
+	tristate "Baikal eSPI controller"
+
 config SPI_BCM2835
 	tristate "BCM2835 SPI controller"
 	depends on GPIOLIB
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index bb49c9e6d0a0c..b11b7e29a3385 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_SPI_AT91_USART)		+= spi-at91-usart.o
 obj-$(CONFIG_SPI_ATH79)			+= spi-ath79.o
 obj-$(CONFIG_SPI_AU1550)		+= spi-au1550.o
 obj-$(CONFIG_SPI_AXI_SPI_ENGINE)	+= spi-axi-spi-engine.o
+obj-$(CONFIG_SPI_BAIKAL_ESPI)		+= spi-baikal-espi.o
 obj-$(CONFIG_SPI_BCM2835)		+= spi-bcm2835.o
 obj-$(CONFIG_SPI_BCM2835AUX)		+= spi-bcm2835aux.o
 obj-$(CONFIG_SPI_BCM63XX)		+= spi-bcm63xx.o
diff --git a/drivers/spi/spi-baikal-espi.c b/drivers/spi/spi-baikal-espi.c
new file mode 100644
index 0000000000000..c8c40b0547844
--- /dev/null
+++ b/drivers/spi/spi-baikal-espi.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Baikal Electronics eSPI controller driver
+ * Copyright (C) 2020-2022 Baikal Electronics, JSC
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/gpio.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/property.h>
+#include <linux/io.h>
+
+#define ESPI_FIFO_LEN	256
+#define ESPI_DUMMY_DATA	0xff
+
+struct espi_data {
+	struct spi_master	*master;
+	/* hardware */
+	int			irq;
+	void __iomem		*regs;
+	struct clk		*clk;
+	/* transfer */
+	uint32_t		rxlen;
+	uint32_t		txlen;
+	uint8_t			*tx;
+	uint8_t			*rx;
+};
+
+#define ESPI_CR1	0x00 /* control 1 */
+#define ESPI_CR2	0x04 /* control 2 */
+#define ESPI_TX_FIFO	0x0c /* tx fifo */
+#define ESPI_TX_FBCAR	0x14 /* tx byte count */
+#define ESPI_TX_FAETR	0x18 /* tx almost empty threshold */
+#define ESPI_RX_FIFO	0x1c /* rx fifo */
+#define ESPI_RX_FBCAR	0x24 /* rx byte count */
+#define ESPI_RX_FAFTR	0x28 /* rx almost full threshold */
+#define ESPI_ISR	0x30 /* irq status */
+#define ESPI_IMR	0x34 /* irq mask */
+#define ESPI_IVR	0x38 /* irq vector */
+
+typedef union {
+	uint32_t val;
+	struct {
+		uint32_t scr :1; /* reset */
+		uint32_t sce :1; /* enable */
+		uint32_t mss :1; /* select - master/slave */
+		uint32_t cph :1; /* clock phase */
+		uint32_t cpo :1; /* clock polarity */
+	} bits;
+} espi_cr1_t;
+
+#define ESPI_CR1_SCR_RESET		0
+#define ESPI_CR1_SCR_NORESET		1
+#define ESPI_CR1_SCE_CORE_DISABLE	0
+#define ESPI_CR1_SCE_CORE_ENABLE	1
+#define ESPI_CR1_MSS_MODE_MASTER	0
+#define ESPI_CR1_MSS_MODE_SLAVE		1
+#define ESPI_CR1_CPH_CLKPHASE_ODD	0
+#define ESPI_CR1_CPH_CLKPHASE_EVEN	1
+#define ESPI_CR1_CPO_CLKPOLAR_LOW	0
+#define ESPI_CR1_CPO_CLKPOLAR_HIGH	1
+
+typedef union {
+	uint32_t val;
+	struct {
+		uint32_t sso :3; /* slave select (ss0-ss7) */
+		uint32_t srd :1; /* read disable */
+		uint32_t sri :1; /* read first byte ignore */
+		uint32_t mlb :1; /* least sign git first */
+		uint32_t mte :1; /* master transfer enable */
+	} bits;
+} espi_cr2_t;
+
+#define ESPI_CR2_SRD_RX_DISABLE		0
+#define ESPI_CR2_SRD_RX_ENABLE		1
+#define ESPI_CR2_SRI_FIRST_RESIEV	0
+#define ESPI_CR2_SRI_FIRST_IGNORE	1
+#define ESPI_CR2_MLB_MSB		0
+#define ESPI_CR2_MLB_LSB		1
+#define ESPI_CR2_MTE_TX_DISABLE		0
+#define ESPI_CR2_MTE_TX_ENABLE		1 /* self clear */
+
+#define ESPI_RX_FAFTR_DISABLE		0xff
+#define ESPI_TX_FAETR_DISABLE		0x0
+#define ESPI_ISR_CLEAR_ALL		0xff /* clear by writing "1" */
+#define ESPI_IMR_DISABLE_ALL		0x0
+#define ESPI_IMR_ENABLE_ALL		0xff
+
+/* isr, imr, ivr */
+typedef union {
+	uint32_t val;
+	struct {
+		uint32_t tx_underrun	 :1;
+		uint32_t tx_overrun	 :1;
+		uint32_t rx_underrun	 :1;
+		uint32_t rx_overrun	 :1;
+		uint32_t tx_almost_empty :1;
+		uint32_t rx_almost_full	 :1;
+		uint32_t tx_done_master	 :1;
+		uint32_t tx_done_slave	 :1;
+		uint32_t alert		 :8;
+		uint32_t rx_crc_error	 :1;
+	} bits;
+} espi_irq_t;
+
+#define ESPI_IRQ_DISABLE	0
+#define ESPI_IRQ_ENABLE		1
+
+typedef union {
+	uint32_t val;
+	struct {
+		uint32_t ese :1; /* enable */
+		uint32_t rms :1; /* reset# direction */
+		uint32_t raa :1; /* reset# assertion active */
+		uint32_t cre :1; /* crc receive enable */
+		uint32_t crf :1; /* crc receive byte placed in fifo */
+		uint32_t wsr :1; /* wait state response */
+		uint32_t sdl :2; /* spi data lines (single, dual, quad) */
+	} bits;
+} espi_cr3_t;
+
+static inline uint32_t espi_readl(struct espi_data *priv, uint32_t offset)
+{
+	return __raw_readl(priv->regs + offset);
+}
+static inline void espi_writel(struct espi_data *priv, uint32_t offset, uint32_t val)
+{
+	__raw_writel(val, priv->regs + offset);
+}
+
+static void espi_set_cs	  (struct spi_device *spi, bool enable);
+static int  espi_setup	  (struct spi_device *spi);
+static void espi_writer	  (struct espi_data *priv);
+static void espi_reader	  (struct espi_data *priv);
+static void espi_reset	  (struct espi_data *priv);
+static void espi_enable_tx(struct espi_data *priv);
+static void espi_enable_rx(struct espi_data *priv);
+
+static void espi_reset(struct espi_data *priv)
+{
+	espi_cr1_t cr1;
+	cr1.val = espi_readl(priv, ESPI_CR1);
+	cr1.bits.scr = ESPI_CR1_SCR_RESET;
+	espi_writel(priv, ESPI_CR1, cr1.val);
+}
+
+static void espi_enable_rx(struct espi_data *priv)
+{
+	espi_cr2_t cr2;
+	cr2.val = espi_readl(priv, ESPI_CR2);
+	cr2.bits.srd = ESPI_CR2_SRD_RX_ENABLE;
+	espi_writel(priv, ESPI_CR2, cr2.val);
+}
+
+static void espi_enable_tx(struct espi_data *priv)
+{
+	espi_cr2_t cr2;
+	int cnt = espi_readl(priv, ESPI_TX_FBCAR);
+	cr2.val = espi_readl(priv, ESPI_CR2);
+
+	if (cnt && cr2.bits.mte != ESPI_CR2_MTE_TX_ENABLE) {
+		cr2.bits.mte = ESPI_CR2_MTE_TX_ENABLE;
+		espi_writel(priv, ESPI_CR2, cr2.val);
+	}
+}
+
+static void espi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct espi_data *priv;
+	espi_cr2_t cr2;
+	pr_debug ("%s: enable = %d\n", __func__, enable);
+	priv = spi_master_get_devdata(spi->master);
+	cr2.val = espi_readl(priv, ESPI_CR2);
+	cr2.bits.sso = spi->chip_select;
+	espi_writel(priv, ESPI_CR2, cr2.val);
+}
+
+static void espi_writer(struct espi_data *priv)
+{
+	volatile uint8_t data = ESPI_DUMMY_DATA;
+	uint32_t free_tx = ESPI_FIFO_LEN - espi_readl(priv, ESPI_TX_FBCAR);
+	uint32_t free_rx = ESPI_FIFO_LEN - espi_readl(priv, ESPI_RX_FBCAR);
+	uint32_t cnt = min (min(free_tx, free_rx), priv->txlen);
+
+	if (!cnt) {
+		return;
+	}
+
+	priv->txlen -= cnt;
+
+	/* fifo */
+	while (cnt--) {
+		if (priv->tx) {
+			data = *priv->tx++;
+		} else {
+			data = ESPI_DUMMY_DATA;
+		}
+		espi_writel(priv, ESPI_TX_FIFO, data);
+	}
+	espi_enable_tx(priv);
+}
+
+static void espi_reader(struct espi_data *priv)
+{
+	volatile uint8_t data;
+	uint32_t have = espi_readl(priv, ESPI_RX_FBCAR);
+
+	/* fifo */
+	while (have--) {
+		data = espi_readl(priv, ESPI_RX_FIFO);
+		if (priv->rxlen) {
+			priv->rxlen--;
+			if (priv->rx) {
+				*priv->rx++ = data;
+			}
+		}
+	}
+}
+
+static irqreturn_t espi_irq_handler(int irq, void *dev)
+{
+	espi_irq_t status;
+	struct spi_master *master = dev;
+	struct espi_data *priv = spi_master_get_devdata(master);
+	int ret = -1;
+
+	status.val = espi_readl(priv, ESPI_ISR);
+	espi_writel(priv, ESPI_ISR, status.val);
+
+	if (!status.val) {
+		dev_err(&master->dev, "irq_none\n");
+		return IRQ_NONE;
+	}
+
+	/* errors */
+	if (status.bits.tx_underrun) {
+		dev_err(&master->dev, "tx_underrun\n");
+		goto exit;
+	}
+	if (status.bits.tx_overrun) {
+		dev_err(&master->dev, "tx_overrun\n");
+		goto exit;
+	}
+	if (status.bits.rx_underrun) {
+		dev_err(&master->dev, "rx_underrun\n");
+		goto exit;
+	}
+	if (status.bits.rx_overrun) {
+		dev_err(&master->dev, "rx_overrun\n");
+		goto exit;
+	}
+	if (status.bits.rx_crc_error) {
+		dev_err(&master->dev, "rx_crc_error\n");
+		goto exit;
+	}
+	if (status.bits.alert) {
+		dev_warn(&master->dev, "alert\n");
+	}
+
+	/* transfer */
+	espi_reader(priv);
+	espi_writer(priv);
+	ret = 0;
+
+exit:
+	if (ret) {
+		master->cur_msg->status = -EIO;
+		priv->txlen = 0;
+		priv->rxlen = 0;
+		espi_reset(priv);
+	}
+
+	/* tx done */
+	if (priv->txlen == 0) {
+		espi_irq_t mask;
+		mask.val = espi_readl(priv, ESPI_IMR);
+		mask.bits.tx_almost_empty = ESPI_IRQ_DISABLE;
+		espi_writel(priv, ESPI_IMR, mask.val);
+	}
+
+	/* done */
+	if ((priv->txlen == 0) && (priv->rxlen == 0)) {
+		espi_writel(priv, ESPI_IMR, ESPI_IMR_DISABLE_ALL);
+		espi_writel(priv, ESPI_ISR, ESPI_ISR_CLEAR_ALL);
+		spi_finalize_current_transfer(master);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int espi_transfer_one(struct spi_master *master,
+			     struct spi_device *spi,
+			     struct spi_transfer *transfer)
+{
+	struct espi_data *priv = spi_master_get_devdata(spi->master);
+
+	priv->tx = (void*)transfer->tx_buf;
+	priv->rx = (void*)transfer->rx_buf;
+	priv->txlen = transfer->len;
+	priv->rxlen = transfer->len;
+
+	/* clear */
+	espi_writel(priv, ESPI_IMR, ESPI_IMR_DISABLE_ALL);
+	espi_writel(priv, ESPI_ISR, ESPI_ISR_CLEAR_ALL);
+
+	/* clk */
+	if (transfer->speed_hz && (transfer->speed_hz != spi->max_speed_hz)) {
+		clk_set_rate(priv->clk, transfer->speed_hz);
+		spi->max_speed_hz = clk_get_rate(priv->clk);
+		pr_debug ("%s: new speed %u\n", __func__, spi->max_speed_hz);
+	}
+
+	/* transfer */
+	espi_writer(priv);
+	espi_writel(priv, ESPI_IMR, ESPI_IMR_ENABLE_ALL);
+
+#ifdef DEBUG
+	/* wait */
+	uint64_t ms;
+	ms = 8LL * 1000LL * transfer->len;
+	do_div(ms, transfer->speed_hz);
+	ms += ms + 200; /* some tolerance */
+	ms = wait_for_completion_timeout(&master->xfer_completion, msecs_to_jiffies(ms));
+	if (ms == 0) {
+		dev_err(&spi->dev, "SPI transfer timed out\n");
+		return -ETIMEDOUT;
+	}
+	/* print */
+	if (transfer->tx_buf) {
+		print_hex_dump_bytes("tx > ", DUMP_PREFIX_NONE, transfer->tx_buf, transfer->len);
+	} else {
+		print_hex_dump_bytes("rx < ", DUMP_PREFIX_NONE, transfer->rx_buf, transfer->len);
+	}
+	return 0;
+#else
+	return 1;
+#endif
+
+}
+
+static int espi_setup(struct spi_device *spi)
+{
+	struct espi_data *priv = spi_master_get_devdata(spi->master);
+	espi_cr1_t cr1;
+	espi_cr2_t cr2;
+
+	espi_reset(priv);
+
+	/* irq */
+	espi_writel(priv, ESPI_IMR, ESPI_IMR_DISABLE_ALL);
+	espi_writel(priv, ESPI_ISR, ESPI_ISR_CLEAR_ALL);
+
+	/* control 1 */
+	cr1.val = espi_readl(priv, ESPI_CR1);
+	cr1.bits.scr = ESPI_CR1_SCR_NORESET;
+	cr1.bits.sce = ESPI_CR1_SCE_CORE_ENABLE;
+	cr1.bits.mss = ESPI_CR1_MSS_MODE_MASTER;
+	cr1.bits.cph = spi->mode & SPI_CPHA ? ESPI_CR1_CPH_CLKPHASE_EVEN :
+					      ESPI_CR1_CPH_CLKPHASE_ODD;
+	cr1.bits.cpo = spi->mode & SPI_CPOL ? ESPI_CR1_CPO_CLKPOLAR_HIGH :
+					      ESPI_CR1_CPO_CLKPOLAR_LOW;
+	espi_writel(priv, ESPI_CR1, cr1.val);
+
+	/* control 2 */
+	cr2.val = espi_readl(priv, ESPI_CR2);
+	cr2.bits.srd = ESPI_CR2_SRD_RX_DISABLE;
+	cr2.bits.mte = ESPI_CR2_MTE_TX_DISABLE;
+	cr2.bits.sri = ESPI_CR2_SRI_FIRST_RESIEV;
+	cr2.bits.mlb = spi->mode & SPI_LSB_FIRST ? ESPI_CR2_MLB_LSB :
+						   ESPI_CR2_MLB_MSB;
+	espi_writel(priv, ESPI_CR2, cr2.val);
+
+	/* threshold */
+	espi_writel(priv, ESPI_TX_FAETR, ESPI_FIFO_LEN / 2);
+	espi_writel(priv, ESPI_RX_FAFTR, ESPI_FIFO_LEN / 2);
+
+	/* full duplex */
+	espi_enable_rx(priv);
+
+	/* clk */
+	spi->max_speed_hz = clk_get_rate(priv->clk);
+	pr_debug ("%s: clk = %u\n", __func__, spi->max_speed_hz);
+
+	return 0;
+}
+
+static int espi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &(pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
+	struct espi_data *priv;
+	struct resource *mem;
+	int err;
+	struct spi_master *master;
+
+	/* init */
+	priv = devm_kzalloc(dev, sizeof(struct espi_data), GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev, "no memory\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(pdev, priv);
+
+	/* reg */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		return -EINVAL;
+	}
+
+	priv->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(priv->regs)) {
+		dev_err(dev, "region map failed\n");
+		return PTR_ERR(priv->regs);
+	}
+
+	/* irq */
+	priv->irq = platform_get_irq(pdev, 0);
+	if (priv->irq < 0) {
+		dev_err(dev, "no irq resource\n");
+		return priv->irq;
+	}
+
+	/* clk */
+	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(dev, "could not get spi clock\n");
+		return PTR_ERR(priv->clk);
+	}
+	err = clk_prepare_enable(priv->clk);
+	if (err) {
+		dev_err(dev, "could not prepare spi clock\n");
+		return err;
+	}
+
+	/* master */
+	master = spi_alloc_master(dev, 0);
+	if (!master) {
+		return -ENOMEM;
+	}
+	priv->master = master;
+	master->num_chipselect = 8;
+	master->use_gpio_descriptors = true;
+	master->mode_bits = SPI_CPHA | SPI_CPOL;
+	master->dev.of_node = dev->of_node;
+	master->dev.fwnode = dev->fwnode;
+	master->transfer_one = espi_transfer_one;
+	master->set_cs = espi_set_cs;
+	master->setup = espi_setup;
+	master->bus_num = of_alias_get_id(master->dev.of_node, "spi");
+	master->flags = SPI_MASTER_GPIO_SS;
+
+	err = devm_request_irq(dev, priv->irq, espi_irq_handler,
+		IRQF_SHARED, pdev->name, master);
+	if (err) {
+		dev_err(dev, "unable to request irq %d\n", priv->irq);
+		return err;
+	}
+
+	err = of_property_read_u32(np, "clock-frequency", &master->max_speed_hz);
+	if (err == 0) {
+		clk_set_rate(priv->clk, master->max_speed_hz);
+	}
+	master->max_speed_hz = clk_get_rate(priv->clk);
+
+	spi_master_set_devdata(master, priv);
+
+	err = devm_spi_register_master(dev, master);
+	if (err) {
+		dev_err(dev, "problem registering spi master\n");
+		spi_master_put(master);
+		return err;
+	}
+
+	dev_info (dev, "base 0x%lx, irq %d, bus %d, speed %d\n",
+		(uintptr_t)priv->regs, priv->irq, master->bus_num, master->max_speed_hz);
+
+	return 0;
+}
+
+static int espi_remove(struct platform_device *pdev)
+{
+	struct espi_data *priv = platform_get_drvdata(pdev);
+	espi_reset(priv);
+	clk_disable_unprepare(priv->clk);
+	return 0;
+}
+
+static const struct of_device_id baikal_espi_of_match[] = {
+	{ .compatible = "baikal,bm1000-espi" },
+	{ .compatible = "baikal,bs1000-espi" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, baikal_espi_of_match);
+
+static struct platform_driver baikal_espi_driver = {
+	.probe		= espi_probe,
+	.remove		= espi_remove,
+	.driver		= {
+		.name	= "baikal-espi",
+		.of_match_table = baikal_espi_of_match
+	}
+};
+module_platform_driver(baikal_espi_driver);
+
+MODULE_DESCRIPTION("Baikal eSPI controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index c2f96941ad04f..29fe2bfc024f5 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -196,6 +196,7 @@ static void dw_writer(struct dw_spi *dws)
 		dw_write_io_reg(dws, DW_SPI_DR, txw);
 		dws->tx += dws->n_bytes;
 	}
+	dw_writel(dws, DW_SPI_SER, 1);
 	spin_unlock(&dws->buf_lock);
 }
 
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d5a039229acf..eaa39af7bb798 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -275,6 +275,16 @@ dw8250_do_pm(struct uart_port *port, unsigned int state, unsigned int old)
 		pm_runtime_put_sync_suspend(port->dev);
 }
 
+static void dw8250_do_set_termios(struct uart_port *p, struct ktermios *termios,
+				  struct ktermios *old)
+{
+	p->status &= ~UPSTAT_AUTOCTS;
+	if (termios->c_cflag & CRTSCTS)
+		p->status |= UPSTAT_AUTOCTS;
+
+	serial8250_do_set_termios(p, termios, old);
+}
+
 static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 			       struct ktermios *old)
 {
@@ -300,11 +310,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 		p->uartclk = rate;
 
 out:
-	p->status &= ~UPSTAT_AUTOCTS;
-	if (termios->c_cflag & CRTSCTS)
-		p->status |= UPSTAT_AUTOCTS;
-
-	serial8250_do_set_termios(p, termios, old);
+	dw8250_do_set_termios(p, termios, old);
 }
 
 static void dw8250_set_ldisc(struct uart_port *p, struct ktermios *termios)
@@ -368,6 +374,8 @@ static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data)
 		}
 		if (of_device_is_compatible(np, "marvell,armada-38x-uart"))
 			p->serial_out = dw8250_serial_out38x;
+		if (of_device_is_compatible(np, "baikal,bm1000-uart"))
+			p->set_termios = dw8250_do_set_termios;
 
 	} else if (acpi_dev_present("APMC0D08", NULL, -1)) {
 		p->iotype = UPIO_MEM32;
@@ -635,6 +643,7 @@ static const struct dev_pm_ops dw8250_pm_ops = {
 
 static const struct of_device_id dw8250_of_match[] = {
 	{ .compatible = "snps,dw-apb-uart" },
+	{ .compatible = "baikal,bm1000-uart" },
 	{ .compatible = "cavium,octeon-3860-uart" },
 	{ .compatible = "marvell,armada-38x-uart" },
 	{ .compatible = "renesas,rzn1-uart" },
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 556a876c78962..c7b783f338a95 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -138,4 +138,13 @@ config USB_DWC3_QCOM
 	  for peripheral mode support.
 	  Say 'Y' or 'M' if you have one such device.
 
+config USB_DWC3_BAIKAL
+	tristate "Baikal Electronics Platforms"
+	depends on OF
+	default USB_DWC3
+	help
+	  Baikal Electronics SoCs with one DesignWare Core USB3 IP
+	  inside.
+	  Say 'Y' or 'M' if you have one such device.
+
 endif
diff --git a/drivers/usb/dwc3/Makefile b/drivers/usb/dwc3/Makefile
index ae86da0dc5bd1..0dcaf92a43ecc 100644
--- a/drivers/usb/dwc3/Makefile
+++ b/drivers/usb/dwc3/Makefile
@@ -51,3 +51,4 @@ obj-$(CONFIG_USB_DWC3_MESON_G12A)	+= dwc3-meson-g12a.o
 obj-$(CONFIG_USB_DWC3_OF_SIMPLE)	+= dwc3-of-simple.o
 obj-$(CONFIG_USB_DWC3_ST)		+= dwc3-st.o
 obj-$(CONFIG_USB_DWC3_QCOM)		+= dwc3-qcom.o
+obj-$(CONFIG_USB_DWC3_BAIKAL)		+= dwc3-baikal.o
diff --git a/drivers/usb/dwc3/dwc3-baikal.c b/drivers/usb/dwc3/dwc3-baikal.c
new file mode 100644
index 0000000000000..8b75e73f52b3c
--- /dev/null
+++ b/drivers/usb/dwc3/dwc3-baikal.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwc3-baikal.c - Baikal Electronics SoCs Specific Glue layer
+ *
+ * Copyright (C) 2015-2022 Baikal Electronics, JSC
+ * Author: Dmitry Dunaev <dmitry.dunaev@baikalelectronics.ru>
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/usb/usb_phy_generic.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+struct dwc3_baikal {
+	struct device	*dev;
+	struct clk	*clk;
+};
+
+static int dwc3_baikal_probe(struct platform_device *pdev)
+{
+	struct device		*dev = &pdev->dev;
+	struct device_node	*node = pdev->dev.of_node;
+	struct dwc3_baikal	*dwc;
+	int			ret;
+
+	dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
+	if (!dwc)
+		return -ENOMEM;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_err(dev, "DMA mask error %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, dwc);
+	dwc->dev = dev;
+
+	dwc->clk = devm_clk_get(dwc->dev, "usb");
+	if (IS_ERR(dwc->clk)) {
+		dev_err(dev, "no interface clk specified\n");
+		return -EINVAL;
+	}
+
+	ret = clk_prepare_enable(dwc->clk);
+	if (ret < 0) {
+		dev_err(dwc->dev, "unable to enable usb clock\n");
+		return ret;
+	}
+
+	if (node) {
+		ret = of_platform_populate(node, NULL, NULL, dev);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to create dwc3 core\n");
+			goto __error;
+		}
+	} else {
+		dev_err(dev, "no device node, failed to add dwc3 core\n");
+		ret = -ENODEV;
+		goto __error;
+	}
+
+	return 0;
+
+__error:
+	clk_disable_unprepare(dwc->clk);
+	return ret;
+}
+
+static int dwc3_baikal_remove_core(struct device *dev, void *c)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	platform_device_unregister(pdev);
+	return 0;
+}
+
+static int dwc3_baikal_remove(struct platform_device *pdev)
+{
+	struct dwc3_baikal *dwc = platform_get_drvdata(pdev);
+
+	device_for_each_child(&pdev->dev, NULL, dwc3_baikal_remove_core);
+	clk_disable_unprepare(dwc->clk);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static const struct of_device_id dwc3_baikal_of_match[] = {
+	{ .compatible = "baikal,bm1000-dwc3" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, dwc3_baikal_of_match);
+
+static struct platform_driver dwc3_baikal_driver = {
+	.probe		= dwc3_baikal_probe,
+	.remove		= dwc3_baikal_remove,
+	.driver		= {
+		.name	= "baikal-dwc3",
+		.of_match_table	= dwc3_baikal_of_match
+	}
+};
+module_platform_driver(dwc3_baikal_driver);
+
+MODULE_ALIAS("platform:baikal-dwc3");
+MODULE_AUTHOR("Dmitry Dunaev <dmitry.dunaev@baikalelectronics.ru>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DesignWare USB3 Baikal Glue Layer");
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 427a993c7f576..c844d513537e2 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -19,6 +19,8 @@ source "drivers/gpu/ipu-v3/Kconfig"
 
 source "drivers/gpu/drm/Kconfig"
 
+source "drivers/gpu/arm/Kconfig"
+
 menu "Frame buffer Devices"
 source "drivers/video/fbdev/Kconfig"
 endmenu
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index fef7c61f5555a..cd578843277e5 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright 2010-2011 Picochip Ltd., Jamie Iles
- * http://www.picochip.com
+ * https://www.picochip.com
  *
  * This file implements a driver for the Synopsys DesignWare watchdog device
  * in the many subsystems. The watchdog has 16 different timeout periods
@@ -14,15 +14,18 @@
 
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/limits.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/pm.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/reset.h>
 #include <linux/watchdog.h>
 
@@ -34,26 +37,64 @@
 #define WDOG_CURRENT_COUNT_REG_OFFSET	    0x08
 #define WDOG_COUNTER_RESTART_REG_OFFSET     0x0c
 #define WDOG_COUNTER_RESTART_KICK_VALUE	    0x76
-
-/* The maximum TOP (timeout period) value that can be set in the watchdog. */
-#define DW_WDT_MAX_TOP		15
+#define WDOG_INTERRUPT_STATUS_REG_OFFSET    0x10
+#define WDOG_INTERRUPT_CLEAR_REG_OFFSET     0x14
+#define WDOG_COMP_PARAMS_5_REG_OFFSET       0xe4
+#define WDOG_COMP_PARAMS_4_REG_OFFSET       0xe8
+#define WDOG_COMP_PARAMS_3_REG_OFFSET       0xec
+#define WDOG_COMP_PARAMS_2_REG_OFFSET       0xf0
+#define WDOG_COMP_PARAMS_1_REG_OFFSET       0xf4
+#define WDOG_COMP_PARAMS_1_USE_FIX_TOP      BIT(6)
+#define WDOG_COMP_VERSION_REG_OFFSET        0xf8
+#define WDOG_COMP_TYPE_REG_OFFSET           0xfc
+
+/* There are sixteen TOPs (timeout periods) that can be set in the watchdog. */
+#define DW_WDT_NUM_TOPS		16
+#define DW_WDT_FIX_TOP(_idx)	(1U << (16 + _idx))
 
 #define DW_WDT_DEFAULT_SECONDS	30
 
+static const u32 dw_wdt_fix_tops[DW_WDT_NUM_TOPS] = {
+	DW_WDT_FIX_TOP(0), DW_WDT_FIX_TOP(1), DW_WDT_FIX_TOP(2),
+	DW_WDT_FIX_TOP(3), DW_WDT_FIX_TOP(4), DW_WDT_FIX_TOP(5),
+	DW_WDT_FIX_TOP(6), DW_WDT_FIX_TOP(7), DW_WDT_FIX_TOP(8),
+	DW_WDT_FIX_TOP(9), DW_WDT_FIX_TOP(10), DW_WDT_FIX_TOP(11),
+	DW_WDT_FIX_TOP(12), DW_WDT_FIX_TOP(13), DW_WDT_FIX_TOP(14),
+	DW_WDT_FIX_TOP(15)
+};
+
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
 		 "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+enum dw_wdt_rmod {
+	DW_WDT_RMOD_RESET = 1,
+	DW_WDT_RMOD_IRQ = 2
+};
+
+struct dw_wdt_timeout {
+	u32 top_val;
+	unsigned int sec;
+	unsigned int msec;
+};
+
 struct dw_wdt {
 	void __iomem		*regs;
 	struct clk		*clk;
+	struct clk		*pclk;
 	unsigned long		rate;
+	enum dw_wdt_rmod	rmod;
+	struct dw_wdt_timeout	timeouts[DW_WDT_NUM_TOPS];
 	struct watchdog_device	wdd;
 	struct reset_control	*rst;
 	/* Save/restore */
 	u32			control;
 	u32			timeout;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		*dbgfs_dir;
+#endif
 };
 
 #define to_dw_wdt(wdd)	container_of(wdd, struct dw_wdt, wdd)
@@ -64,20 +105,84 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt)
 		WDOG_CONTROL_REG_WDT_EN_MASK;
 }
 
-static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top)
+static void dw_wdt_update_mode(struct dw_wdt *dw_wdt, enum dw_wdt_rmod rmod)
+{
+	u32 val;
+
+	val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+	if (rmod == DW_WDT_RMOD_IRQ)
+		val |= WDOG_CONTROL_REG_RESP_MODE_MASK;
+	else
+		val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK;
+	writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+
+	dw_wdt->rmod = rmod;
+}
+
+static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt,
+					 unsigned int timeout, u32 *top_val)
+{
+	int idx;
+
+	/*
+	 * Find a TOP with timeout greater or equal to the requested number.
+	 * Note we'll select a TOP with maximum timeout if the requested
+	 * timeout couldn't be reached.
+	 */
+	for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) {
+		if (dw_wdt->timeouts[idx].sec >= timeout)
+			break;
+	}
+
+	if (idx == DW_WDT_NUM_TOPS)
+		--idx;
+
+	*top_val = dw_wdt->timeouts[idx].top_val;
+
+	return dw_wdt->timeouts[idx].sec;
+}
+
+static unsigned int dw_wdt_get_min_timeout(struct dw_wdt *dw_wdt)
 {
+	int idx;
+
 	/*
-	 * There are 16 possible timeout values in 0..15 where the number of
-	 * cycles is 2 ^ (16 + i) and the watchdog counts down.
+	 * We'll find a timeout greater or equal to one second anyway because
+	 * the driver probe would have failed if there was none.
 	 */
-	return (1U << (16 + top)) / dw_wdt->rate;
+	for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) {
+		if (dw_wdt->timeouts[idx].sec)
+			break;
+	}
+
+	return dw_wdt->timeouts[idx].sec;
 }
 
-static int dw_wdt_get_top(struct dw_wdt *dw_wdt)
+static unsigned int dw_wdt_get_max_timeout_ms(struct dw_wdt *dw_wdt)
 {
-	int top = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF;
+	struct dw_wdt_timeout *timeout = &dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1];
+	u64 msec;
+
+	msec = (u64)timeout->sec * MSEC_PER_SEC + timeout->msec;
 
-	return dw_wdt_top_in_seconds(dw_wdt, top);
+	return msec < UINT_MAX ? msec : UINT_MAX;
+}
+
+static unsigned int dw_wdt_get_timeout(struct dw_wdt *dw_wdt)
+{
+	int top_val = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF;
+	int idx;
+
+	for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) {
+		if (dw_wdt->timeouts[idx].top_val == top_val)
+			break;
+	}
+
+	/*
+	 * In IRQ mode due to the two stages counter, the actual timeout is
+	 * twice greater than the TOP setting.
+	 */
+	return dw_wdt->timeouts[idx].sec * dw_wdt->rmod;
 }
 
 static int dw_wdt_ping(struct watchdog_device *wdd)
@@ -93,17 +198,23 @@ static int dw_wdt_ping(struct watchdog_device *wdd)
 static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s)
 {
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
-	int i, top_val = DW_WDT_MAX_TOP;
+	unsigned int timeout;
+	u32 top_val;
 
 	/*
-	 * Iterate over the timeout values until we find the closest match. We
-	 * always look for >=.
+	 * Note IRQ mode being enabled means having a non-zero pre-timeout
+	 * setup. In this case we try to find a TOP as close to the half of the
+	 * requested timeout as possible since DW Watchdog IRQ mode is designed
+	 * in two stages way - first timeout rises the pre-timeout interrupt,
+	 * second timeout performs the system reset. So basically the effective
+	 * watchdog-caused reset happens after two watchdog TOPs elapsed.
 	 */
-	for (i = 0; i <= DW_WDT_MAX_TOP; ++i)
-		if (dw_wdt_top_in_seconds(dw_wdt, i) >= top_s) {
-			top_val = i;
-			break;
-		}
+	timeout = dw_wdt_find_best_top(dw_wdt, DIV_ROUND_UP(top_s, dw_wdt->rmod),
+				       &top_val);
+	if (dw_wdt->rmod == DW_WDT_RMOD_IRQ)
+		wdd->pretimeout = timeout;
+	else
+		wdd->pretimeout = 0;
 
 	/*
 	 * Set the new value in the watchdog.  Some versions of dw_wdt
@@ -114,7 +225,34 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s)
 	writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT,
 	       dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
 
-	wdd->timeout = dw_wdt_top_in_seconds(dw_wdt, top_val);
+	/* Kick new TOP value into the watchdog counter if activated. */
+	if (watchdog_active(wdd))
+		dw_wdt_ping(wdd);
+
+	/*
+	 * In case users set bigger timeout value than HW can support,
+	 * kernel(watchdog_dev.c) helps to feed watchdog before
+	 * wdd->max_hw_heartbeat_ms
+	 */
+	if (top_s * 1000 <= wdd->max_hw_heartbeat_ms)
+		wdd->timeout = timeout * dw_wdt->rmod;
+	else
+		wdd->timeout = top_s;
+
+	return 0;
+}
+
+static int dw_wdt_set_pretimeout(struct watchdog_device *wdd, unsigned int req)
+{
+	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
+
+	/*
+	 * We ignore actual value of the timeout passed from user-space
+	 * using it as a flag whether the pretimeout functionality is intended
+	 * to be activated.
+	 */
+	dw_wdt_update_mode(dw_wdt, req ? DW_WDT_RMOD_IRQ : DW_WDT_RMOD_RESET);
+	dw_wdt_set_timeout(wdd, wdd->timeout);
 
 	return 0;
 }
@@ -123,8 +261,11 @@ static void dw_wdt_arm_system_reset(struct dw_wdt *dw_wdt)
 {
 	u32 val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
 
-	/* Disable interrupt mode; always perform system reset. */
-	val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK;
+	/* Disable/enable interrupt mode depending on the RMOD flag. */
+	if (dw_wdt->rmod == DW_WDT_RMOD_IRQ)
+		val |= WDOG_CONTROL_REG_RESP_MODE_MASK;
+	else
+		val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK;
 	/* Enable watchdog. */
 	val |= WDOG_CONTROL_REG_WDT_EN_MASK;
 	writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
@@ -135,6 +276,7 @@ static int dw_wdt_start(struct watchdog_device *wdd)
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
 
 	dw_wdt_set_timeout(wdd, wdd->timeout);
+	dw_wdt_ping(&dw_wdt->wdd);
 	dw_wdt_arm_system_reset(dw_wdt);
 
 	return 0;
@@ -161,6 +303,7 @@ static int dw_wdt_restart(struct watchdog_device *wdd,
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
 
 	writel(0, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+	dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET);
 	if (dw_wdt_is_enabled(dw_wdt))
 		writel(WDOG_COUNTER_RESTART_KICK_VALUE,
 		       dw_wdt->regs + WDOG_COUNTER_RESTART_REG_OFFSET);
@@ -176,9 +319,19 @@ static int dw_wdt_restart(struct watchdog_device *wdd,
 static unsigned int dw_wdt_get_timeleft(struct watchdog_device *wdd)
 {
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
+	unsigned int sec;
+	u32 val;
 
-	return readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET) /
-		dw_wdt->rate;
+	val = readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET);
+	sec = val / dw_wdt->rate;
+
+	if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) {
+		val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET);
+		if (!val)
+			sec += wdd->pretimeout;
+	}
+
+	return sec;
 }
 
 static const struct watchdog_info dw_wdt_ident = {
@@ -187,16 +340,41 @@ static const struct watchdog_info dw_wdt_ident = {
 	.identity	= "Synopsys DesignWare Watchdog",
 };
 
+static const struct watchdog_info dw_wdt_pt_ident = {
+	.options	= WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+			  WDIOF_PRETIMEOUT | WDIOF_MAGICCLOSE,
+	.identity	= "Synopsys DesignWare Watchdog",
+};
+
 static const struct watchdog_ops dw_wdt_ops = {
 	.owner		= THIS_MODULE,
 	.start		= dw_wdt_start,
 	.stop		= dw_wdt_stop,
 	.ping		= dw_wdt_ping,
 	.set_timeout	= dw_wdt_set_timeout,
+	.set_pretimeout	= dw_wdt_set_pretimeout,
 	.get_timeleft	= dw_wdt_get_timeleft,
 	.restart	= dw_wdt_restart,
 };
 
+static irqreturn_t dw_wdt_irq(int irq, void *devid)
+{
+	struct dw_wdt *dw_wdt = devid;
+	u32 val;
+
+	/*
+	 * We don't clear the IRQ status. It's supposed to be done by the
+	 * following ping operations.
+	 */
+	val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET);
+	if (!val)
+		return IRQ_NONE;
+
+	watchdog_notify_pretimeout(&dw_wdt->wdd);
+
+	return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int dw_wdt_suspend(struct device *dev)
 {
@@ -205,6 +383,7 @@ static int dw_wdt_suspend(struct device *dev)
 	dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
 	dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
 
+	clk_disable_unprepare(dw_wdt->pclk);
 	clk_disable_unprepare(dw_wdt->clk);
 
 	return 0;
@@ -218,6 +397,12 @@ static int dw_wdt_resume(struct device *dev)
 	if (err)
 		return err;
 
+	err = clk_prepare_enable(dw_wdt->pclk);
+	if (err) {
+		clk_disable_unprepare(dw_wdt->clk);
+		return err;
+	}
+
 	writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
 	writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
 
@@ -229,6 +414,139 @@ static int dw_wdt_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(dw_wdt_pm_ops, dw_wdt_suspend, dw_wdt_resume);
 
+/*
+ * In case if DW WDT IP core is synthesized with fixed TOP feature disabled the
+ * TOPs array can be arbitrary ordered with nearly any sixteen uint numbers
+ * depending on the system engineer imagination. The next method handles the
+ * passed TOPs array to pre-calculate the effective timeouts and to sort the
+ * TOP items out in the ascending order with respect to the timeouts.
+ */
+
+static void dw_wdt_handle_tops(struct dw_wdt *dw_wdt, const u32 *tops)
+{
+	struct dw_wdt_timeout tout, *dst;
+	int val, tidx;
+	u64 msec;
+
+	/*
+	 * We walk over the passed TOPs array and calculate corresponding
+	 * timeouts in seconds and milliseconds. The milliseconds granularity
+	 * is needed to distinguish the TOPs with very close timeouts and to
+	 * set the watchdog max heartbeat setting further.
+	 */
+	for (val = 0; val < DW_WDT_NUM_TOPS; ++val) {
+		tout.top_val = val;
+		tout.sec = tops[val] / dw_wdt->rate;
+		msec = (u64)tops[val] * MSEC_PER_SEC;
+		do_div(msec, dw_wdt->rate);
+		tout.msec = msec - ((u64)tout.sec * MSEC_PER_SEC);
+
+		/*
+		 * Find a suitable place for the current TOP in the timeouts
+		 * array so that the list is remained in the ascending order.
+		 */
+		for (tidx = 0; tidx < val; ++tidx) {
+			dst = &dw_wdt->timeouts[tidx];
+			if (tout.sec > dst->sec || (tout.sec == dst->sec &&
+			    tout.msec >= dst->msec))
+				continue;
+			else
+				swap(*dst, tout);
+		}
+
+		dw_wdt->timeouts[val] = tout;
+	}
+}
+
+static int dw_wdt_init_timeouts(struct dw_wdt *dw_wdt, struct device *dev)
+{
+	u32 data, of_tops[DW_WDT_NUM_TOPS];
+	const u32 *tops;
+	int ret;
+
+	/*
+	 * Retrieve custom or fixed counter values depending on the
+	 * WDT_USE_FIX_TOP flag found in the component specific parameters
+	 * #1 register.
+	 */
+	data = readl(dw_wdt->regs + WDOG_COMP_PARAMS_1_REG_OFFSET);
+	if (data & WDOG_COMP_PARAMS_1_USE_FIX_TOP) {
+		tops = dw_wdt_fix_tops;
+	} else {
+		ret = of_property_read_variable_u32_array(dev_of_node(dev),
+			"snps,watchdog-tops", of_tops, DW_WDT_NUM_TOPS,
+			DW_WDT_NUM_TOPS);
+		if (ret < 0) {
+			dev_warn(dev, "No valid TOPs array specified\n");
+			tops = dw_wdt_fix_tops;
+		} else {
+			tops = of_tops;
+		}
+	}
+
+	/* Convert the specified TOPs into an array of watchdog timeouts. */
+	dw_wdt_handle_tops(dw_wdt, tops);
+	if (!dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1].sec) {
+		dev_err(dev, "No any valid TOP detected\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define DW_WDT_DBGFS_REG(_name, _off) \
+{				      \
+	.name = _name,		      \
+	.offset = _off		      \
+}
+
+static const struct debugfs_reg32 dw_wdt_dbgfs_regs[] = {
+	DW_WDT_DBGFS_REG("cr", WDOG_CONTROL_REG_OFFSET),
+	DW_WDT_DBGFS_REG("torr", WDOG_TIMEOUT_RANGE_REG_OFFSET),
+	DW_WDT_DBGFS_REG("ccvr", WDOG_CURRENT_COUNT_REG_OFFSET),
+	DW_WDT_DBGFS_REG("crr", WDOG_COUNTER_RESTART_REG_OFFSET),
+	DW_WDT_DBGFS_REG("stat", WDOG_INTERRUPT_STATUS_REG_OFFSET),
+	DW_WDT_DBGFS_REG("param5", WDOG_COMP_PARAMS_5_REG_OFFSET),
+	DW_WDT_DBGFS_REG("param4", WDOG_COMP_PARAMS_4_REG_OFFSET),
+	DW_WDT_DBGFS_REG("param3", WDOG_COMP_PARAMS_3_REG_OFFSET),
+	DW_WDT_DBGFS_REG("param2", WDOG_COMP_PARAMS_2_REG_OFFSET),
+	DW_WDT_DBGFS_REG("param1", WDOG_COMP_PARAMS_1_REG_OFFSET),
+	DW_WDT_DBGFS_REG("version", WDOG_COMP_VERSION_REG_OFFSET),
+	DW_WDT_DBGFS_REG("type", WDOG_COMP_TYPE_REG_OFFSET)
+};
+
+static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt)
+{
+	struct device *dev = dw_wdt->wdd.parent;
+	struct debugfs_regset32 *regset;
+
+	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return;
+
+	regset->regs = dw_wdt_dbgfs_regs;
+	regset->nregs = ARRAY_SIZE(dw_wdt_dbgfs_regs);
+	regset->base = dw_wdt->regs;
+
+	dw_wdt->dbgfs_dir = debugfs_create_dir(dev_name(dev), NULL);
+
+	debugfs_create_regset32("registers", 0444, dw_wdt->dbgfs_dir, regset);
+}
+
+static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt)
+{
+	debugfs_remove_recursive(dw_wdt->dbgfs_dir);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+
+static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt) {}
+static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt) {}
+
+#endif /* !CONFIG_DEBUG_FS */
+
 static int dw_wdt_drv_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -244,9 +562,18 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 	if (IS_ERR(dw_wdt->regs))
 		return PTR_ERR(dw_wdt->regs);
 
-	dw_wdt->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(dw_wdt->clk))
-		return PTR_ERR(dw_wdt->clk);
+	/*
+	 * Try to request the watchdog dedicated timer clock source. It must
+	 * be supplied if asynchronous mode is enabled. Otherwise fallback
+	 * to the common timer/bus clocks configuration, in which the very
+	 * first found clock supply both timer and APB signals.
+	 */
+	dw_wdt->clk = devm_clk_get(dev, "tclk");
+	if (IS_ERR(dw_wdt->clk)) {
+		dw_wdt->clk = devm_clk_get(dev, NULL);
+		if (IS_ERR(dw_wdt->clk))
+			return PTR_ERR(dw_wdt->clk);
+	}
 
 	ret = clk_prepare_enable(dw_wdt->clk);
 	if (ret)
@@ -258,20 +585,64 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 		goto out_disable_clk;
 	}
 
+	/*
+	 * Request APB clock if device is configured with async clocks mode.
+	 * In this case both tclk and pclk clocks are supposed to be specified.
+	 * Alas we can't know for sure whether async mode was really activated,
+	 * so the pclk phandle reference is left optional. If it couldn't be
+	 * found we consider the device configured in synchronous clocks mode.
+	 */
+	dw_wdt->pclk = devm_clk_get_optional(dev, "pclk");
+	if (IS_ERR(dw_wdt->pclk)) {
+		ret = PTR_ERR(dw_wdt->pclk);
+		goto out_disable_clk;
+	}
+
+	ret = clk_prepare_enable(dw_wdt->pclk);
+	if (ret)
+		goto out_disable_clk;
+
 	dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
 	if (IS_ERR(dw_wdt->rst)) {
 		ret = PTR_ERR(dw_wdt->rst);
-		goto out_disable_clk;
+		goto out_disable_pclk;
+	}
+
+	/* Enable normal reset without pre-timeout by default. */
+	dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET);
+
+	/*
+	 * Pre-timeout IRQ is optional, since some hardware may lack support
+	 * of it. Note we must request rising-edge IRQ, since the lane is left
+	 * pending either until the next watchdog kick event or up to the
+	 * system reset.
+	 */
+	ret = platform_get_irq_optional(pdev, 0);
+	if (ret > 0) {
+		ret = devm_request_irq(dev, ret, dw_wdt_irq,
+				       IRQF_SHARED | IRQF_TRIGGER_RISING,
+				       pdev->name, dw_wdt);
+		if (ret)
+			goto out_disable_pclk;
+
+		dw_wdt->wdd.info = &dw_wdt_pt_ident;
+	} else {
+		if (ret == -EPROBE_DEFER)
+			goto out_disable_pclk;
+
+		dw_wdt->wdd.info = &dw_wdt_ident;
 	}
 
 	reset_control_deassert(dw_wdt->rst);
 
+	ret = dw_wdt_init_timeouts(dw_wdt, dev);
+	if (ret)
+		goto out_disable_clk;
+
 	wdd = &dw_wdt->wdd;
-	wdd->info = &dw_wdt_ident;
 	wdd->ops = &dw_wdt_ops;
-	wdd->min_timeout = 1;
-	wdd->max_hw_heartbeat_ms =
-		dw_wdt_top_in_seconds(dw_wdt, DW_WDT_MAX_TOP) * 1000;
+	wdd->min_timeout = dw_wdt_get_min_timeout(dw_wdt);
+	wdd->max_hw_heartbeat_ms = dw_wdt_get_max_timeout_ms(dw_wdt);
 	wdd->parent = dev;
 
 	watchdog_set_drvdata(wdd, dw_wdt);
@@ -284,7 +655,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 	 * devicetree.
 	 */
 	if (dw_wdt_is_enabled(dw_wdt)) {
-		wdd->timeout = dw_wdt_get_top(dw_wdt);
+		wdd->timeout = dw_wdt_get_timeout(dw_wdt);
 		set_bit(WDOG_HW_RUNNING, &wdd->status);
 	} else {
 		wdd->timeout = DW_WDT_DEFAULT_SECONDS;
@@ -297,10 +668,15 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 
 	ret = watchdog_register_device(wdd);
 	if (ret)
-		goto out_disable_clk;
+		goto out_disable_pclk;
+
+	dw_wdt_dbgfs_init(dw_wdt);
 
 	return 0;
 
+out_disable_pclk:
+	clk_disable_unprepare(dw_wdt->pclk);
+
 out_disable_clk:
 	clk_disable_unprepare(dw_wdt->clk);
 	return ret;
@@ -310,8 +686,11 @@ static int dw_wdt_drv_remove(struct platform_device *pdev)
 {
 	struct dw_wdt *dw_wdt = platform_get_drvdata(pdev);
 
+	dw_wdt_dbgfs_clear(dw_wdt);
+
 	watchdog_unregister_device(&dw_wdt->wdd);
 	reset_control_assert(dw_wdt->rst);
+	clk_disable_unprepare(dw_wdt->pclk);
 	clk_disable_unprepare(dw_wdt->clk);
 
 	return 0;
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 1847a07842437..1c3a53eb69087 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -22,6 +22,8 @@
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 #define MARVELL_PHY_ID_88X3310		0x002b09a0
 #define MARVELL_PHY_ID_88E2110		0x002b09b0
+#define MARVELL_PHY_ID_88X2222          0x01410f10
+#define MARVELL_PHY_ID_88X2222R         0x014131b0
 
 /* These Ethernet switch families contain embedded PHYs, but they do
  * not have a model ID. So the switch driver traps reads to the ID2
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 75456a66024a9..a2111938e3adc 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -58,6 +58,8 @@ extern struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */
 extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */
 extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */
 extern struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */
+extern struct pci_ecam_ops baikal_m_pcie_ecam_ops; /* Baikal-M Synopsys DesignWare PCIe */
+extern struct pci_ecam_ops baikal_s_pcie_ecam_ops; /* Baikal-S Synopsys DesignWare PCIe */
 #endif
 
 #ifdef CONFIG_PCI_HOST_COMMON
diff --git a/samples/Kconfig b/samples/Kconfig
index c8dacb4dda80c..77f400b5b96fc 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -169,4 +169,11 @@ config SAMPLE_VFS
 	  as mount API and statx().  Note that this is restricted to the x86
 	  arch whilst it accesses system calls that aren't yet in all arches.
 
+config SAMPLE_BPF
+        bool "Build BPF sample code"
+        depends on HEADERS_INSTALL
+        help
+          Build samples of BPF filters using various methods of
+          BPF filter construction.
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 7d6e4ca28d696..3b33993ac85e7 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_SAMPLE_TRACE_PRINTK)	+= trace_printk/
 obj-$(CONFIG_VIDEO_PCI_SKELETON)	+= v4l/
 obj-y					+= vfio-mdev/
 subdir-$(CONFIG_SAMPLE_VFS)		+= vfs
+subdir-$(CONFIG_SAMPLE_BPF)             += bpf
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index 87ba66dcfd478..84a19812f4aea 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -10,6 +10,10 @@
 #include <sound/hdaudio.h>
 #include <sound/hda_register.h>
 
+#ifdef CONFIG_OF
+#include <linux/of.h>
+#endif
+
 /* clear CORB read pointer properly */
 static void azx_clear_corbrp(struct hdac_bus *bus)
 {
@@ -77,8 +81,19 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus)
 	snd_hdac_chip_writew(bus, RIRBWP, AZX_RIRBWP_RST);
 	/* set N=1, get RIRB response interrupt for new entry */
 	snd_hdac_chip_writew(bus, RINTCNT, 1);
-	/* enable rirb dma and response irq */
+
+#ifdef CONFIG_OF
+	if (bus->polling_mode && of_property_read_bool(bus->dev->of_node, 
+					"broken-response-irq"))
+		/* enable rirb dma without response irq */
+		snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN);
+	else
+		/* enable rirb dma and response irq */
+		snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN);
+#else
 	snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN);
+#endif
+
 	/* Accept unsolicited responses */
 	snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, AZX_GCTL_UNSOL);
 	spin_unlock_irq(&bus->reg_lock);
@@ -145,6 +160,11 @@ int snd_hdac_bus_send_cmd(struct hdac_bus *bus, unsigned int val)
 
 	spin_lock_irq(&bus->reg_lock);
 
+#ifdef CONFIG_OF
+	if (of_property_read_bool(bus->dev->of_node, 
+			"increment-codec-address"))
+		val = val + 0x10000000;
+#endif
 	bus->last_cmd[azx_command_addr(val)] = val;
 
 	/* add command to corb */
@@ -154,6 +174,7 @@ int snd_hdac_bus_send_cmd(struct hdac_bus *bus, unsigned int val)
 		spin_unlock_irq(&bus->reg_lock);
 		return -EIO;
 	}
+
 	wp++;
 	wp %= AZX_MAX_CORB_ENTRIES;
 
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index dae47a45b2b8b..e057cca10034c 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -48,6 +48,19 @@ config SND_HDA_TEGRA
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-hda-tegra.
 
+config SND_HDA_BAIKAL_M
+	tristate "Baikal-M HD Audio"
+	depends on ARCH_BAIKAL
+	select SND_HDA
+	select SND_HDA_ALIGNED_MMIO
+	help
+	  Say Y here to support the HDA controller present in Baikal-M 
+	  SoC
+
+	  This options enables support for the HD Audio controller
+	  present in Baikal-M SoC, used to communicate audio
+	  to the mezzanine board outputs.
+
 if SND_HDA
 
 config SND_HDA_HWDEP
diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile
index b57432f000568..065238009cfb8 100644
--- a/sound/pci/hda/Makefile
+++ b/sound/pci/hda/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 snd-hda-intel-objs := hda_intel.o
 snd-hda-tegra-objs := hda_tegra.o
+snd-hda-baikal-m-objs := hda_baikal.o
 
 snd-hda-codec-y := hda_bind.o hda_codec.o hda_jack.o hda_auto_parser.o hda_sysfs.o
 snd-hda-codec-y += hda_controller.o
@@ -48,3 +49,4 @@ obj-$(CONFIG_SND_HDA_CODEC_HDMI) += snd-hda-codec-hdmi.o
 # when built in kernel
 obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-intel.o
 obj-$(CONFIG_SND_HDA_TEGRA) += snd-hda-tegra.o
+obj-$(CONFIG_SND_HDA_BAIKAL_M) += snd-hda-baikal-m.o
\ No newline at end of file
diff --git a/sound/pci/hda/hda_baikal.c b/sound/pci/hda/hda_baikal.c
new file mode 100644
index 0000000000000..d024c875028e4
--- /dev/null
+++ b/sound/pci/hda/hda_baikal.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+* Implementation of primary ALSA driver code base for Baikal-M HDA controller.
+*
+* /sound/pci/hda/hda_baikal.c
+*
+* Copyright (C) 2021-2022 Baikal Electronics, JSC
+* Author: Danila Sharikov <Danila.Sharikov@baikalelectronics.com>
+*
+* Based on:
+*	 hda_intel.c, Copyright(c) 2004 Intel Corporation. All rights reserved.
+*        hda_tegra.c
+*/
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/pm_runtime.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/hda_codec.h>
+
+#include "hda_controller.h"
+
+#ifdef CONFIG_PM
+static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT;
+module_param(power_save, bint, 0644);
+MODULE_PARM_DESC(power_save,
+		 "Automatic power-saving timeout (in seconds, 0 = disable).");
+#else
+#define power_save	0
+#endif
+
+/* max number of SDs */
+#define NUM_CAPTURE_SD	4
+#define NUM_PLAYBACK_SD	4
+
+struct hda_baikal {
+	struct azx chip;
+	struct device *dev;
+	void __iomem *regs;
+	struct work_struct probe_work;
+	struct work_struct irq_pending_work;
+	unsigned int irq_pending_warned:1;
+};
+
+static void hda_baikal_probe_work(struct work_struct *work);
+
+static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev);
+static const struct hda_controller_ops hda_baikal_ops;
+
+/* calculate runtime delay from LPIB */
+static int azx_get_delay_from_lpib(struct azx *chip, struct azx_dev *azx_dev,
+				   unsigned int pos)
+{
+	struct snd_pcm_substream *substream = azx_dev->core.substream;
+	int stream = substream->stream;
+	unsigned int lpib_pos = azx_get_pos_lpib(chip, azx_dev);
+	int delay;
+
+	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+		delay = pos - lpib_pos;
+	else
+		delay = lpib_pos - pos;
+	if (delay < 0) {
+		if (delay >= azx_dev->core.delay_negative_threshold)
+			delay = 0;
+		else
+			delay += azx_dev->core.bufsize;
+	}
+
+	if (delay >= azx_dev->core.period_bytes) {
+		dev_info(chip->card->dev,
+			 "Unstable LPIB (%d >= %d); disabling LPIB delay counting\n",
+			 delay, azx_dev->core.period_bytes);
+		delay = 0;
+		chip->driver_caps &= ~AZX_DCAPS_COUNT_LPIB_DELAY;
+		chip->get_delay[stream] = NULL;
+	}
+
+	return bytes_to_frames(substream->runtime, delay);
+}
+
+/* called from IRQ */
+static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev)
+{
+	struct hda_baikal *hda = container_of(chip, struct hda_baikal, chip);
+	int ok;
+
+	ok = azx_position_ok(chip, azx_dev);
+
+	if (ok == 1) {
+		azx_dev->irq_pending = 0;
+		return ok;
+	} else if (ok == 0) {
+		/* bogus IRQ, process it later */
+		azx_dev->irq_pending = 1;
+		schedule_work(&hda->irq_pending_work);
+	}
+	return 0;
+}
+
+/*
+ * Check whether the current DMA position is acceptable for updating
+ * periods.  Returns non-zero if it's OK.
+ *
+ * Many HD-audio controllers appear pretty inaccurate about
+ * the update-IRQ timing.  The IRQ is issued before actually the
+ * data is processed.  So, we need to process it afterwords in a
+ * workqueue.
+ */
+static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
+{
+	struct snd_pcm_substream *substream = azx_dev->core.substream;
+	int stream = substream->stream;
+	u32 wallclk;
+	unsigned int pos;
+
+	wallclk = azx_readl(chip, WALLCLK) - azx_dev->core.start_wallclk;
+	if (wallclk < (azx_dev->core.period_wallclk * 2) / 3)
+		return -1;	/* bogus (too early) interrupt */
+
+	if (chip->get_position[stream])
+		pos = chip->get_position[stream](chip, azx_dev);
+	else { /* use the position buffer as default */
+		pos = azx_get_pos_posbuf(chip, azx_dev);
+		if (!pos || pos == (u32)-1) {
+			dev_info(chip->card->dev,
+				 "Invalid position buffer, using LPIB read method instead.\n");
+			chip->get_position[stream] = azx_get_pos_lpib;
+			if (chip->get_position[0] == azx_get_pos_lpib &&
+			    chip->get_position[1] == azx_get_pos_lpib)
+				azx_bus(chip)->use_posbuf = false;
+			pos = azx_get_pos_lpib(chip, azx_dev);
+			chip->get_delay[stream] = NULL;
+		} else {
+			chip->get_position[stream] = azx_get_pos_posbuf;
+			if (chip->driver_caps & AZX_DCAPS_COUNT_LPIB_DELAY)
+				chip->get_delay[stream] = azx_get_delay_from_lpib;
+		}
+	}
+
+	if (pos >= azx_dev->core.bufsize)
+		pos = 0;
+
+	if (WARN_ONCE(!azx_dev->core.period_bytes,
+		      "hda-baikal: zero azx_dev->period_bytes"))
+		return -1; /* this shouldn't happen! */
+	if (wallclk < (azx_dev->core.period_wallclk * 5) / 4 &&
+	    pos % azx_dev->core.period_bytes > azx_dev->core.period_bytes / 2)
+		/* NG - it's below the first next period boundary */
+		return chip->bdl_pos_adj ? 0 : -1;
+	azx_dev->core.start_wallclk += wallclk;
+	return 1; /* OK, it's fine */
+}
+
+/* The work for pending PCM period updates */
+static void azx_irq_pending_work(struct work_struct *work)
+{
+	struct hda_baikal *hda = container_of(work, struct hda_baikal, irq_pending_work);
+	struct azx *chip = &hda->chip;
+	struct hdac_bus *bus = azx_bus(chip);
+	struct hdac_stream *s;
+	int pending, ok;
+
+	if (!hda->irq_pending_warned) {
+		dev_info(chip->card->dev,
+			 "IRQ timing workaround is activated for card #%d. Suggest a bigger bdl_pos_adj.\n",
+			 chip->card->number);
+		hda->irq_pending_warned = 1;
+	}
+
+	for (;;) {
+		pending = 0;
+		spin_lock_irq(&bus->reg_lock);
+		list_for_each_entry(s, &bus->stream_list, list) {
+			struct azx_dev *azx_dev = stream_to_azx_dev(s);
+			if (!azx_dev->irq_pending ||
+			    !s->substream ||
+			    !s->running)
+				continue;
+			ok = azx_position_ok(chip, azx_dev);
+			if (ok > 0) {
+				azx_dev->irq_pending = 0;
+				spin_unlock(&bus->reg_lock);
+				snd_pcm_period_elapsed(s->substream);
+				spin_lock(&bus->reg_lock);
+			} else if (ok < 0) {
+				pending = 0; /* too early */
+			} else
+				pending++;
+		}
+		spin_unlock_irq(&bus->reg_lock);
+		if (!pending)
+			return;
+		msleep(1);
+	}
+}
+
+/* clear irq_pending flags and assure no on-going workq */
+static void azx_clear_irq_pending(struct azx *chip)
+{
+	struct hdac_bus *bus = azx_bus(chip);
+	struct hdac_stream *s;
+
+	spin_lock_irq(&bus->reg_lock);
+	list_for_each_entry(s, &bus->stream_list, list) {
+		struct azx_dev *azx_dev = stream_to_azx_dev(s);
+		azx_dev->irq_pending = 0;
+	}
+	spin_unlock_irq(&bus->reg_lock);
+}
+
+static int hda_baikal_dev_disconnect(struct snd_device *device)
+{
+	struct azx *chip = device->device_data;
+
+	chip->bus.shutdown = 1;
+	return 0;
+}
+
+static int hda_baikal_dev_free(struct snd_device *device)
+{
+	struct azx *chip = device->device_data;
+	struct hda_baikal *hda = container_of(chip, struct hda_baikal, chip);
+
+	cancel_work_sync(&hda->probe_work);
+	if (azx_bus(chip)->chip_init) {
+		azx_clear_irq_pending(chip);
+		azx_stop_all_streams(chip);
+		azx_stop_chip(chip);
+	}
+
+	azx_free_stream_pages(chip);
+	azx_free_streams(chip);
+	snd_hdac_bus_exit(azx_bus(chip));
+
+	return 0;
+}
+
+static int hda_baikal_init_chip(struct azx *chip, struct platform_device *pdev)
+{
+	struct hda_baikal *hda = container_of(chip, struct hda_baikal, chip);
+	struct hdac_bus *bus = azx_bus(chip);
+	struct device *dev = hda->dev;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hda->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(hda->regs))
+		return PTR_ERR(hda->regs);
+
+	bus->remap_addr = hda->regs;
+	bus->addr = res->start;
+
+	return 0;
+}
+
+static int hda_baikal_first_init(struct azx *chip, struct platform_device *pdev)
+{
+	struct hdac_bus *bus = azx_bus(chip);
+	struct snd_card *card = chip->card;
+	int err;
+	unsigned short gcap;
+	int irq_id = platform_get_irq(pdev, 0);
+	const char *sname, *drv_name = "baikal-hda";
+	struct device_node *np = pdev->dev.of_node;
+
+	err = hda_baikal_init_chip(chip, pdev);
+	if (err)
+		return err;
+
+	err = devm_request_irq(chip->card->dev, irq_id, azx_interrupt,
+			     IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err) {
+		dev_err(chip->card->dev,
+			"unable to request IRQ %d, disabling device\n",
+			irq_id);
+		return err;
+	}
+	bus->irq = irq_id;
+
+	synchronize_irq(bus->irq);
+
+	gcap = azx_readw(chip, GCAP);
+	dev_dbg(card->dev, "chipset global capabilities = 0x%x\n", gcap);
+
+	/*
+	 * Read number of streams from GCAP register instead of using
+	 * hardcoded value
+	 */
+	chip->capture_streams = (gcap >> 8) & 0x0f;
+	chip->playback_streams = (gcap >> 12) & 0x0f;
+	if (!chip->playback_streams && !chip->capture_streams) {
+		/* gcap didn't give any info, switching to old method */
+		chip->playback_streams = NUM_PLAYBACK_SD;
+		chip->capture_streams = NUM_CAPTURE_SD;
+	}
+	chip->capture_index_offset = 0;
+	chip->playback_index_offset = chip->capture_streams;
+	chip->num_streams = chip->playback_streams + chip->capture_streams;
+
+	/* initialize streams */
+	err = azx_init_streams(chip);
+	if (err < 0) {
+		dev_err(card->dev, "failed to initialize streams: %d\n", err);
+		return err;
+	}
+
+	err = azx_alloc_stream_pages(chip);
+	if (err < 0) {
+		dev_err(card->dev, "failed to allocate stream pages: %d\n",
+			err);
+		return err;
+	}
+
+	/* initialize chip */
+	azx_init_chip(chip, 1);
+
+	/* codec detection */
+	if (!bus->codec_mask) {
+		dev_err(card->dev, "no codecs found!\n");
+		return -ENODEV;
+	}
+
+	/* driver name */
+	strncpy(card->driver, drv_name, sizeof(card->driver));
+	/* shortname for card */
+	sname = of_get_property(np, "baikal,model", NULL);
+	if (!sname)
+		sname = drv_name;
+	if (strlen(sname) > sizeof(card->shortname))
+		dev_info(card->dev, "truncating shortname for card\n");
+	strncpy(card->shortname, sname, sizeof(card->shortname));
+
+	/* longname for card */
+	snprintf(card->longname, sizeof(card->longname),
+		 "%s at 0x%lx irq %i",
+		 card->shortname, bus->addr, bus->irq);
+
+	return 0;
+}
+
+static int hda_baikal_create(struct snd_card *card,
+			    unsigned int driver_caps,
+			    struct hda_baikal *hda)
+{
+	static struct snd_device_ops ops = {
+		.dev_disconnect = hda_baikal_dev_disconnect,
+		.dev_free = hda_baikal_dev_free,
+	};
+	struct azx *chip;
+	int err;
+
+	chip = &hda->chip;
+
+	mutex_init(&chip->open_mutex);
+	chip->card = card;
+	chip->ops = &hda_baikal_ops;
+	chip->driver_caps = driver_caps;
+	chip->driver_type = driver_caps & 0xff;
+	chip->dev_index = 0;
+	INIT_LIST_HEAD(&chip->pcm_list);
+	INIT_WORK(&hda->irq_pending_work, azx_irq_pending_work);
+
+	chip->codec_probe_mask = 0x3; /* two codecs */
+
+	chip->single_cmd = false;
+	chip->snoop = true;
+
+	chip->get_position[0] = chip->get_position[1] = azx_get_pos_lpib;
+	chip->get_delay[0] = chip->get_delay[1] = azx_get_delay_from_lpib;
+
+	INIT_WORK(&hda->probe_work, hda_baikal_probe_work);
+
+	err = azx_bus_init(chip, NULL);
+	if (err < 0)
+		return err;
+
+	chip->bus.needs_damn_long_delay = 1;
+	chip->bus.core.aligned_mmio = 1;
+
+	/* force polling mode, because RIRB interrupts don't working */
+	if (of_property_read_bool(hda->dev->of_node, "force-polling-mode"))
+		chip->bus.core.polling_mode = 1;
+	else
+		chip->bus.core.polling_mode = 0;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+	if (err < 0) {
+		dev_err(card->dev, "Error creating device\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int hda_baikal_probe(struct platform_device *pdev)
+{
+	const unsigned int driver_flags = AZX_DCAPS_PM_RUNTIME |
+					  AZX_DCAPS_NO_64BIT |
+					  AZX_DCAPS_4K_BDLE_BOUNDARY |
+					  AZX_DCAPS_COUNT_LPIB_DELAY;
+	struct snd_card *card;
+	struct azx *chip;
+	struct hda_baikal *hda;
+	int err;
+
+	hda = devm_kzalloc(&pdev->dev, sizeof(*hda), GFP_KERNEL);
+	if (!hda)
+		return -ENOMEM;
+	hda->dev = &pdev->dev;
+	chip = &hda->chip;
+
+	err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			   THIS_MODULE, 0, &card);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Error creating card!\n");
+		return err;
+	}
+
+	err = hda_baikal_create(card, driver_flags, hda);
+	if (err < 0)
+		goto out_free;
+	card->private_data = chip;
+
+	dev_set_drvdata(&pdev->dev, card);
+
+	pm_runtime_enable(hda->dev);
+	if (!azx_has_pm_runtime(chip))
+		pm_runtime_forbid(hda->dev);
+
+	schedule_work(&hda->probe_work);
+
+	return 0;
+
+out_free:
+	snd_card_free(card);
+	return err;
+}
+
+/* Probe the given codec address */
+static int baikal_probe_codec(struct azx *chip, int addr)
+{
+	unsigned int cmd = (addr << 28) | (AC_NODE_ROOT << 20) |
+		(AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
+	struct hdac_bus *bus = azx_bus(chip);
+	int err;
+	unsigned int res = -1;
+
+	mutex_lock(&bus->cmd_mutex);
+	chip->probing = 1;
+	bus->ops->command(bus, cmd);
+	err = bus->ops->get_response(bus, addr, &res);
+	chip->probing = 0;
+	mutex_unlock(&bus->cmd_mutex);
+	if (err < 0 || res == -1)
+		return -EIO;
+	dev_dbg(chip->card->dev, "codec #%d probed OK\n", addr);
+	return 0;
+}
+
+/* Probe codecs */
+static int azx_baikal_probe_codecs(struct azx *chip, unsigned int max_slots)
+{
+	struct hdac_bus *bus = azx_bus(chip);
+	int c, codecs, err;
+
+	int probe_retry;
+
+	codecs = 0;
+	if (!max_slots)
+		max_slots = AZX_DEFAULT_CODECS;
+
+	for (c = 0; c < max_slots; c++) {
+		if ((bus->codec_mask & (1 << c)) & chip->codec_probe_mask) {
+			for (probe_retry = 0; probe_retry < 100; probe_retry++) {
+				if (baikal_probe_codec(chip, c) < 0) {
+					azx_stop_chip(chip);
+					azx_init_chip(chip, true);
+					continue;
+				} else {
+					dev_warn(chip->card->dev,
+						"Codec #%d probe success; retry count = %d\n",
+						c, probe_retry);
+					break;
+				}
+				bus->codec_mask &= ~(1 << c);
+				dev_warn(chip->card->dev,
+					"Codec #%d probe error; disabling it...\n", c);
+			}
+		}
+	}
+
+	/* Then create codec instances */
+	for (c = 0; c < max_slots; c++) {
+		if ((bus->codec_mask & (1 << c)) & chip->codec_probe_mask) {
+			struct hda_codec *codec;
+			err = snd_hda_codec_new(&chip->bus, chip->card, c, &codec);
+			if (err < 0)
+				continue;
+			codec->jackpoll_interval = chip->jackpoll_interval;
+			codec->beep_mode = chip->beep_mode;
+			codecs++;
+		}
+	}
+	if (!codecs) {
+		dev_err(chip->card->dev, "no codecs initialized\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static void hda_baikal_probe_work(struct work_struct *work)
+{
+	struct hda_baikal *hda = container_of(work, struct hda_baikal,
+					probe_work);
+	struct azx *chip = &hda->chip;
+	struct hdac_bus *bus = azx_bus(chip);
+	struct platform_device *pdev = to_platform_device(hda->dev);
+	int max_slots;
+	int err;
+
+	pm_runtime_get_sync(hda->dev);
+	err = hda_baikal_first_init(chip, pdev);
+	if (err < 0)
+		goto out_free;
+
+	switch (bus->codec_mask) {
+	case 0x1:
+		max_slots = 1;
+		break;
+	case 0x2:
+		max_slots = 2;
+		break;
+	case 0x3:
+		max_slots = 2;
+		break;
+	}
+
+	/* create codec instances */
+	if (of_property_read_bool(bus->dev->of_node, "cyclic-codec-probe"))
+		err = azx_baikal_probe_codecs(chip, max_slots);
+	else
+		err = azx_probe_codecs(chip, max_slots);
+
+	if (err < 0)
+		goto out_free;
+
+	err = azx_codec_configure(chip);
+	if (err < 0)
+		goto out_free;
+
+	err = snd_card_register(chip->card);
+	if (err < 0)
+		goto out_free;
+
+	chip->running = 1;
+
+	snd_hda_set_power_save(&chip->bus, power_save * 1000);
+
+ out_free:
+	pm_runtime_put(hda->dev);
+	return; /* no error return from async probe */
+}
+
+static int hda_baikal_remove(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = snd_card_free(dev_get_drvdata(&pdev->dev));
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static void hda_baikal_shutdown(struct platform_device *pdev)
+{
+	struct snd_card *card = dev_get_drvdata(&pdev->dev);
+	struct azx *chip;
+
+	if (!card)
+		return;
+	chip = card->private_data;
+	if (chip && chip->running)
+		azx_stop_chip(chip);
+}
+
+static const struct hda_controller_ops hda_baikal_ops = {
+	.position_check = azx_position_check,
+};
+
+static const struct of_device_id hda_baikal_match[] = {
+	{ .compatible = "baikal,bm1000-hda" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, hda_baikal_match);
+
+static struct platform_driver baikal_platform_hda = {
+	.driver = {
+		.name = "baikal-hda",
+		.of_match_table = hda_baikal_match,
+	},
+	.probe = hda_baikal_probe,
+	.remove = hda_baikal_remove,
+	.shutdown = hda_baikal_shutdown,
+};
+module_platform_driver(baikal_platform_hda);
+
+MODULE_DESCRIPTION("Baikal HDA bus driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 6dff68691dfff..e37d5833abad5 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -1270,21 +1270,21 @@ int azx_probe_codecs(struct azx *chip, unsigned int max_slots)
 	for (c = 0; c < max_slots; c++) {
 		if ((bus->codec_mask & (1 << c)) & chip->codec_probe_mask) {
 			if (probe_codec(chip, c) < 0) {
-				/* Some BIOSen give you wrong codec addresses
-				 * that don't exist
-				 */
-				dev_warn(chip->card->dev,
-					 "Codec #%d probe error; disabling it...\n", c);
-				bus->codec_mask &= ~(1 << c);
-				/* More badly, accessing to a non-existing
-				 * codec often screws up the controller chip,
-				 * and disturbs the further communications.
-				 * Thus if an error occurs during probing,
-				 * better to reset the controller chip to
-				 * get back to the sanity state.
-				 */
-				azx_stop_chip(chip);
-				azx_init_chip(chip, true);
+					/* Some BIOSen give you wrong codec addresses
+					 * that don't exist
+					 */
+					dev_warn(chip->card->dev,
+						"Codec #%d probe error; disabling it...\n", c);
+					bus->codec_mask &= ~(1 << c);
+					/* More badly, accessing to a non-existing
+					 * codec often screws up the controller chip,
+					 * and disturbs the further communications.
+					 * Thus if an error occurs during probing,
+					 * better to reset the controller chip to
+					 * get back to the sanity state.
+					 */
+					azx_stop_chip(chip);
+					azx_init_chip(chip, true);
 			}
 		}
 	}
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 424faafcb85b9..64328506c0f14 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1853,6 +1853,9 @@ static int aic3x_i2c_probe(struct i2c_client *i2c,
 		if (ret != 0)
 			goto err;
 		gpio_direction_output(aic3x->gpio_reset, 0);
+
+		udelay(100);
+		gpio_set_value(aic3x->gpio_reset, 1);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++)
diff --git a/sound/soc/dwc/Makefile b/sound/soc/dwc/Makefile
index 91e1aaab99669..91b8fed03a1fb 100644
--- a/sound/soc/dwc/Makefile
+++ b/sound/soc/dwc/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 # SYNOPSYS Platform Support
 obj-$(CONFIG_SND_DESIGNWARE_I2S) += designware_i2s.o
+obj-$(CONFIG_ARCH_BAIKAL) += baikal_snd.o
 
 designware_i2s-y := dwc-i2s.o
 designware_i2s-$(CONFIG_SND_DESIGNWARE_PCM) += dwc-pcm.o
diff --git a/sound/soc/dwc/baikal_snd.c b/sound/soc/dwc/baikal_snd.c
new file mode 100644
index 0000000000000..bb5a0d3e48172
--- /dev/null
+++ b/sound/soc/dwc/baikal_snd.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019-2022 Baikal Electronics, JSC
+ * Author: Ekaterina Skachko <Ekaterina.Skachko@baikalelectronics.ru>
+ * Author: Danila Sharikov <Danila.Sharikov@baikalelectronics.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <sound/soc.h>
+#include <sound/initval.h>
+
+#include "local.h"
+
+static struct snd_soc_dai_link snd_baikal_dai = {
+	.dai_fmt = SND_SOC_DAIFMT_I2S	|
+		   SND_SOC_DAIFMT_NB_NF	|
+		   SND_SOC_DAIFMT_CBM_CFM,
+};
+
+static struct snd_soc_card snd_baikal_card = {
+	.name		= "snd-baikal-card",
+	.owner		= THIS_MODULE,
+	.dai_link	= &snd_baikal_dai,
+	.num_links	= 1,
+};
+
+static int snd_soc_baikal_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *codec_np, *baikal_np;
+	struct snd_soc_dai_link_component *comp;
+	int err;
+
+	comp = devm_kzalloc(&pdev->dev, 3 * sizeof(*comp), GFP_KERNEL);
+	if (!comp)
+		return -ENOMEM;
+
+	baikal_np = of_parse_phandle(np, "baikal,cpu-dai", 0);
+	if (!baikal_np) {
+		dev_err(&pdev->dev,
+			"'baikal,cpu-dai': missing or invalid!\n");
+		return -EINVAL;
+	}
+
+	codec_np = of_parse_phandle(np, "baikal,audio-codec", 0);
+	if (!codec_np) {
+		dev_err(&pdev->dev,
+			"'baikal,audio-codec': missing or invalid!\n");
+		return -EINVAL;
+	}
+
+	snd_baikal_card.dev = &pdev->dev;
+	snd_baikal_dai.cpus = &comp[0];
+	snd_baikal_dai.codecs = &comp[1];
+	snd_baikal_dai.platforms = &comp[2];
+
+	err = of_property_read_string(np, "baikal,dai-name",
+		&snd_baikal_dai.name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"'baikal,dai-name': missing or invalid!\n");
+		return -EINVAL;
+	}
+
+	err = of_property_read_string(np, "baikal,stream-name",
+		&snd_baikal_dai.stream_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"'baikal,stream-name': missing or invalid!\n");
+		return -EINVAL;
+	}
+
+	err = of_property_read_string(np, "baikal,codec-name",
+		&snd_baikal_dai.codecs->dai_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"'baikal,codec-name': missing or invalid!\n");
+		return -EINVAL;
+	}
+
+	snd_baikal_dai.num_cpus = 1;
+	snd_baikal_dai.num_codecs = 1;
+	snd_baikal_dai.num_platforms = 1;
+
+	snd_baikal_dai.cpus->of_node = baikal_np;
+	snd_baikal_dai.codecs->of_node = codec_np;
+	snd_baikal_dai.platforms->of_node = baikal_np;
+
+	snd_soc_of_parse_card_name(&snd_baikal_card, "baikal,card-name");
+
+	platform_set_drvdata(pdev, &snd_baikal_card);
+	snd_soc_card_set_drvdata(&snd_baikal_card, NULL);
+
+	err = snd_soc_register_card(&snd_baikal_card);
+	if (err)
+		dev_err(&pdev->dev,
+			"snd_soc_register_card failed (%d)!\n", err);
+
+	return err;
+}
+
+static int snd_soc_baikal_remove(struct platform_device *pdev)
+{
+	struct snd_soc_card *snd_baikal_card = platform_get_drvdata(pdev);
+
+	snd_soc_unregister_card(snd_baikal_card);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id snd_soc_baikal_of_match[] = {
+	{ .compatible = "baikal,snd-soc-baikal", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, snd_soc_baikal_of_match);
+#endif
+
+
+static struct platform_driver snd_soc_baikal_driver = {
+	.probe		= snd_soc_baikal_probe,
+	.remove		= snd_soc_baikal_remove,
+	.driver		= {
+		.name	= "snd-soc-baikal",
+		.of_match_table = of_match_ptr(snd_soc_baikal_of_match),
+	},
+};
+
+module_platform_driver(snd_soc_baikal_driver);
+
+MODULE_AUTHOR("Danila Sharikov <Danila.Sharikov@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-M SoC Sound Driver");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c
index 65112b9d8588a..f77845f5f8de5 100644
--- a/sound/soc/dwc/dwc-i2s.c
+++ b/sound/soc/dwc/dwc-i2s.c
@@ -136,11 +136,12 @@ static irqreturn_t i2s_irq_handler(int irq, void *dev_id)
 			irq_valid = true;
 		}
 
-		/* Error Handling: TX */
+		/* Error Handling: RX
 		if (isr[i] & ISR_RXFO) {
 			dev_err(dev->dev, "RX overrun (ch_id=%d)\n", i);
 			irq_valid = true;
 		}
+		*/
 	}
 
 	if (irq_valid)
@@ -189,6 +190,9 @@ static int dw_i2s_startup(struct snd_pcm_substream *substream,
 	struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(cpu_dai);
 	union dw_i2s_snd_dma_data *dma_data = NULL;
 
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->codec_dai;
+
 	if (!(dev->capability & DWC_I2S_RECORD) &&
 			(substream->stream == SNDRV_PCM_STREAM_CAPTURE))
 		return -EINVAL;
@@ -203,6 +207,7 @@ static int dw_i2s_startup(struct snd_pcm_substream *substream,
 		dma_data = &dev->capture_dma_data;
 
 	snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)dma_data);
+	snd_soc_dai_set_sysclk(codec_dai, 0, 12000000, SND_SOC_CLOCK_IN);
 
 	return 0;
 }
@@ -611,12 +616,13 @@ static int dw_configure_dai_by_dt(struct dw_i2s_dev *dev,
 
 }
 
+
 static int dw_i2s_probe(struct platform_device *pdev)
 {
 	const struct i2s_platform_data *pdata = pdev->dev.platform_data;
 	struct dw_i2s_dev *dev;
 	struct resource *res;
-	int ret, irq;
+	int ret, irq, irq_num;
 	struct snd_soc_dai_driver *dw_i2s_dai;
 	const char *clk_id;
 
@@ -639,15 +645,27 @@ static int dw_i2s_probe(struct platform_device *pdev)
 
 	dev->dev = &pdev->dev;
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0) {
-		ret = devm_request_irq(&pdev->dev, irq, i2s_irq_handler, 0,
-				pdev->name, dev);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "failed to request irq\n");
-			return ret;
+	irq_num = platform_irq_count(pdev);
+	if (irq_num < 0)
+		return irq_num; /* -EPROBE_DEFER */
+
+	if (!irq_num)
+		dev_err(&pdev->dev, "No irq found on device\n");
+
+	irq_num--;
+	do {
+		irq = platform_get_irq(pdev, irq_num);
+		if (irq >= 0) {
+			printk(KERN_INFO "%s Registered IRQ number %d %d\n",
+				__func__, irq, irq_num);
+			ret = devm_request_irq(&pdev->dev, irq, i2s_irq_handler,
+				0, pdev->name, dev);
+			if (ret < 0) {
+				dev_err(&pdev->dev, "failed to request irq\n");
+				return ret;
+			}
 		}
-	}
+	} while (irq_num--);
 
 	dev->i2s_reg_comp1 = I2S_COMP_PARAM_1;
 	dev->i2s_reg_comp2 = I2S_COMP_PARAM_2;
@@ -711,8 +729,10 @@ static int dw_i2s_probe(struct platform_device *pdev)
 	}
 
 	pm_runtime_enable(&pdev->dev);
+	
 	return 0;
 
+
 err_clk_disable:
 	if (dev->capability & DW_I2S_MASTER)
 		clk_disable_unprepare(dev->clk);
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index 8dadaa0fbb439..fa9a2ff4b409a 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -1,8 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
 build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
+ifdef CROSS_COMPILE
+fixdep:
+else
 fixdep:
 	$(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
+endif
 
 fixdep-clean:
 	$(Q)$(MAKE) -C $(srctree)/tools/build clean