video: tegra: nvmap: Use MSR instruction for cache flush

Doing an entire cache flush by set/ways on Denver using can
take considerably long time because of too many context switches.
On the other hand, on Denver, MSR instruction provides a way to
flush the entire cache and avoid the context switch overhead.
This definitely helps use cases which need to flush the entire cache
frequently.

Bug 200035864

Bug 18775332

Change-Id: I60486d0dedfd337e4c586d9e7118cb26b16cc1c6
Signed-off-by: Sri Krishna chowdary <schowdary@nvidia.com>
Signed-off-by: Xia Yang <xiay@nvidia.com>
diff --git a/drivers/video/tegra/nvmap/nvmap_mm.c b/drivers/video/tegra/nvmap/nvmap_mm.c
index 942394c..24f69f4 100644
--- a/drivers/video/tegra/nvmap/nvmap_mm.c
+++ b/drivers/video/tegra/nvmap/nvmap_mm.c
@@ -24,12 +24,28 @@
 
 #include "nvmap_priv.h"
 
+inline static void nvmap_flush_dcache_all(void *dummy)
+{
+#if defined(CONFIG_DENVER_CPU)
+	u64 id_afr0;
+	asm volatile ("mrs %0, ID_AFR0_EL1" : "=r"(id_afr0));
+	if (likely((id_afr0 & 0xf00) == 0x100)) {
+		asm volatile ("msr s3_0_c15_c13_0, %0" : : "r" (0));
+		asm volatile ("dsb sy");
+	} else {
+		__flush_dcache_all(NULL);
+	}
+#else
+	__flush_dcache_all(NULL);
+#endif
+}
+
 void inner_flush_cache_all(void)
 {
 #if defined(CONFIG_ARM64) && defined(CONFIG_NVMAP_CACHE_MAINT_BY_SET_WAYS_ON_ONE_CPU)
-	__flush_dcache_all(NULL);
+	nvmap_flush_dcache_all(NULL);
 #elif defined(CONFIG_ARM64)
-	on_each_cpu(__flush_dcache_all, NULL, 1);
+	on_each_cpu(nvmap_flush_dcache_all, NULL, 1);
 #elif defined(CONFIG_NVMAP_CACHE_MAINT_BY_SET_WAYS_ON_ONE_CPU)
 	v7_flush_kern_cache_all();
 #else