From e27598227a12485c787a57581b1797531941bf51 Mon Sep 17 00:00:00 2001 Message-Id: In-Reply-To: References: From: Akinobu Mita Date: Wed, 4 Jun 2014 16:06:50 -0700 Subject: [PATCH 11/26] x86: enable DMA CMA with swiotlb commit 9c5a3621427da68afe6a078cadf807d2c8cc1d12 upstream. Ported-by: Nam Ninh The DMA Contiguous Memory Allocator support on x86 is disabled when swiotlb config option is enabled. So DMA CMA is always disabled on x86_64 because swiotlb is always enabled. This attempts to support for DMA CMA with enabling swiotlb config option. The contiguous memory allocator on x86 is integrated in the function dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops for dma_alloc_coherent(). x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops tries to allocate with dma_generic_alloc_coherent() firstly and then swiotlb_alloc_coherent() is called as a fallback. The main part of supporting DMA CMA with swiotlb is that changing x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops for dma_free_coherent() so that it can distinguish memory allocated by dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent() and release it with dma_generic_free_coherent() which can handle contiguous memory. This change requires making is_swiotlb_buffer() global function. This also needs to change .free callback in the dma_map_ops for amd_gart and sta2x11, because these dma_ops are also using dma_generic_alloc_coherent(). Signed-off-by: Akinobu Mita Acked-by: Marek Szyprowski Acked-by: Konrad Rzeszutek Wilk Cc: David Woodhouse Cc: Don Dutile Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andi Kleen Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jim Somerville --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/swiotlb.h | 7 +++++++ arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 9 ++++++--- arch/x86/pci/sta2x11-fixup.c | 6 ++---- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 48ae099..9e841a5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -47,7 +47,7 @@ config X86 select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS - select HAVE_DMA_CONTIGUOUS if !SWIOTLB + select HAVE_DMA_CONTIGUOUS select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_KPROBES_ON_FTRACE diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 977f176..ab05d73 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void) static inline void dma_mark_clean(void *addr, size_t size) {} +extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs); +extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs); + #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b574b29..8e3842f 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, struct dma_attrs *attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); - free_pages((unsigned long)vaddr, get_order(size)); + dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 4853440..284d506 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -16,7 +16,7 @@ int swiotlb __read_mostly; -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, +void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { @@ -30,11 +30,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -static void x86_swiotlb_free_coherent(struct device *dev, size_t size, +void x86_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, struct dma_attrs *attrs) { - swiotlb_free_coherent(dev, size, vaddr, dma_addr); + if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) + swiotlb_free_coherent(dev, size, vaddr, dma_addr); + else + dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } static struct dma_map_ops swiotlb_dma_ops = { diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 9d8a509..5ceda85 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, { void *vaddr; - vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags); + vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); *dma_handle = p2a(*dma_handle, to_pci_dev(dev)); return vaddr; } @@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, /* We have our own dma_ops: the same as swiotlb but from alloc (above) */ static struct dma_map_ops sta2x11_dma_ops = { .alloc = sta2x11_swiotlb_alloc_coherent, - .free = swiotlb_free_coherent, + .free = x86_swiotlb_free_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, -- 1.8.3.1