@@ -216,13 +216,6 @@ static int bcm2712_iommu_init(struct bcm2712_iommu *mmu)
216216 return - ENOMEM ;
217217 }
218218 MMU_WR (MMMU_ILLEGAL_ADR_OFFSET , MMMU_ILLEGAL_ADR_ENABLE + u );
219- dma_sync_single_for_device (mmu -> dev ,
220- virt_to_phys (mmu -> top_table ),
221- PAGE_SIZE , DMA_TO_DEVICE );
222- dma_sync_single_for_device (mmu -> dev ,
223- virt_to_phys (mmu -> default_page ),
224- PAGE_SIZE , DMA_TO_DEVICE );
225- mmu -> dirty_top = false;
226219 mmu -> nmapped_pages = 0 ;
227220
228221 /* Flush (and enable) the shared TLB cache; enable this MMU. */
@@ -267,16 +260,21 @@ static int bcm2712_iommu_map(struct iommu_domain *domain, unsigned long iova,
267260 struct bcm2712_iommu * mmu = domain_to_mmu (domain );
268261 u32 entry = MMMU_PTE_VALID | (pa >> IOMMU_PAGE_SHIFT );
269262 u32 align = (u32 )(iova | pa | bytes );
270- unsigned int p ;
271263 unsigned long flags ;
264+ unsigned int p , p_last , lxp ;
265+ bool dirty_top = false;
272266
273267 /* Reject if not entirely within our aperture (should never happen) */
274268 bytes *= count ;
275269 if (iova < mmu -> aperture_base || iova + bytes > mmu -> aperture_end ) {
276270 * mapped = 0 ;
277271 return - EINVAL ;
278272 }
273+
274+ /* DMA addresses -> page numbers */
279275 iova -= mmu -> aperture_base ;
276+ p = iova >> IOMMU_PAGE_SHIFT ;
277+ p_last = (iova + bytes - 1 ) >> IOMMU_PAGE_SHIFT ;
280278
281279 /*
282280 * Check we have allocated the required Level-2 tables (in units of
@@ -285,25 +283,33 @@ static int bcm2712_iommu_map(struct iommu_domain *domain, unsigned long iova,
285283 * A failure here will cause the entire map() call to fail.
286284 */
287285 spin_lock_irqsave (& mmu -> hw_lock , flags );
288- for (p = iova >> TABLES_LXPAGE_SHIFT ;
289- p <= (iova + bytes - 1 ) >> TABLES_LXPAGE_SHIFT ; p ++ ) {
290- if (!mmu -> tables [p ]) {
291- unsigned int p1 , u ;
292-
293- u = bcm2712_iommu_getpage (mmu , & mmu -> tables [p ]);
294- if (!u ) {
295- spin_unlock_irqrestore (& mmu -> hw_lock , flags );
296- * mapped = 0 ;
297- return - ENOMEM ;
298- }
286+ for (lxp = (p >> LX_PAGEWORDS_SHIFT );
287+ lxp <= (p_last >> LX_PAGEWORDS_SHIFT ); lxp ++ ) {
288+ if (!mmu -> tables [lxp ]) {
289+ unsigned int i , u ;
290+
291+ u = bcm2712_iommu_getpage (mmu , & mmu -> tables [lxp ]);
292+ if (!u )
293+ break ;
299294 u |= MMMU_PTE_VALID ;
300- for (p1 = p << (PAGE_SHIFT - IOMMU_PAGE_SHIFT );
301- p1 < (p + 1 ) << (PAGE_SHIFT - IOMMU_PAGE_SHIFT ); p1 ++ ) {
302- mmu -> top_table [p1 ] = u ++ ;
295+ if (!dirty_top )
296+ dma_sync_single_for_cpu (mmu -> dev , virt_to_phys (& mmu -> top_table ),
297+ PAGE_SIZE , DMA_TO_DEVICE );
298+ for (i = lxp << (PAGE_SHIFT - IOMMU_PAGE_SHIFT );
299+ i < (lxp + 1 ) << (PAGE_SHIFT - IOMMU_PAGE_SHIFT ); i ++ ) {
300+ mmu -> top_table [i ] = u ++ ;
303301 }
304- mmu -> dirty_top = true;
302+ dirty_top = true;
305303 }
306304 }
305+ if (dirty_top )
306+ dma_sync_single_for_device (mmu -> dev , virt_to_phys (& mmu -> top_table ),
307+ PAGE_SIZE , DMA_TO_DEVICE );
308+ if (lxp <= (p_last >> TABLES_LXPAGE_SHIFT )) {
309+ spin_unlock_irqrestore (& mmu -> hw_lock , flags );
310+ * mapped = 0 ;
311+ return - ENOMEM ;
312+ }
307313
308314 /* large page and write enable flags */
309315 if (!(align & ((1 << IOMMU_HUGEPAGE_SHIFT ) - 1 )))
@@ -315,14 +321,25 @@ static int bcm2712_iommu_map(struct iommu_domain *domain, unsigned long iova,
315321 if (prot & IOMMU_WRITE )
316322 entry |= MMMU_PTE_WRITEABLE ;
317323
318- /* Now fill in the level-2 tables */
319- for (p = iova >> IOMMU_PAGE_SHIFT ;
320- p < (iova + bytes ) >> IOMMU_PAGE_SHIFT ; p ++ ) {
324+ /*
325+ * Again iterate over table-pages and bring them into CPU ownwership.
326+ * Fill in the required PT entries, then give them back to the device.
327+ */
328+ while (p <= p_last ) {
321329 u32 * tbl = mmu -> tables [p >> LX_PAGEWORDS_SHIFT ];
322330
323- mmu -> nmapped_pages += !tbl [p & LX_PAGEWORDS_MASK ];
324- tbl [p & LX_PAGEWORDS_MASK ] = entry ++ ;
331+ dma_sync_single_for_cpu (mmu -> dev , virt_to_phys (tbl ),
332+ PAGE_SIZE , DMA_TO_DEVICE );
333+ while (p <= p_last ) {
334+ mmu -> nmapped_pages += !tbl [p & LX_PAGEWORDS_MASK ];
335+ tbl [p & LX_PAGEWORDS_MASK ] = entry ++ ;
336+ if (IS_ALIGNED (++ p , (1u << LX_PAGEWORDS_SHIFT )))
337+ break ;
338+ }
339+ dma_sync_single_for_device (mmu -> dev , virt_to_phys (tbl ),
340+ PAGE_SIZE , DMA_TO_DEVICE );
325341 }
342+
326343 spin_unlock_irqrestore (& mmu -> hw_lock , flags );
327344 * mapped = bytes ;
328345 return 0 ;
@@ -334,7 +351,7 @@ static size_t bcm2712_iommu_unmap(struct iommu_domain *domain, unsigned long iov
334351{
335352 struct bcm2712_iommu * mmu = domain_to_mmu (domain );
336353 unsigned long flags ;
337- unsigned int p ;
354+ unsigned int p , p_last ;
338355
339356 /* Reject if not entirely within our aperture (should never happen) */
340357 bytes *= count ;
@@ -345,18 +362,29 @@ static size_t bcm2712_iommu_unmap(struct iommu_domain *domain, unsigned long iov
345362 spin_lock_irqsave (& mmu -> hw_lock , flags );
346363 iommu_iotlb_gather_add_range (gather , iova , bytes );
347364
348- /* DMA address -> address relative to region mapped by tables */
365+ /* DMA addresses -> page numbers */
349366 iova -= mmu -> aperture_base ;
367+ p = iova >> IOMMU_PAGE_SHIFT ;
368+ p_last = (iova + bytes - 1 ) >> IOMMU_PAGE_SHIFT ;
350369
351- /* Clear table entries, this marks the addresses as illegal */
352- for (p = iova >> IOMMU_PAGE_SHIFT ;
353- p < (iova + bytes ) >> IOMMU_PAGE_SHIFT ;
354- p ++ ) {
370+ /*
371+ * Iterate over tables in Linux-pages and bring them into CPU ownwership.
372+ * Clear the required PT entries, then give them back to the device.
373+ */
374+ while (p <= p_last ) {
355375 u32 * tbl = mmu -> tables [p >> LX_PAGEWORDS_SHIFT ];
356376
357- if (tbl && tbl [p & LX_PAGEWORDS_MASK ]) {
358- tbl [p & LX_PAGEWORDS_MASK ] = 0 ;
359- mmu -> nmapped_pages -- ;
377+ if (tbl ) {
378+ dma_sync_single_for_cpu (mmu -> dev , virt_to_phys (tbl ),
379+ PAGE_SIZE , DMA_TO_DEVICE );
380+ while (p <= p_last ) {
381+ mmu -> nmapped_pages -= !!tbl [p & LX_PAGEWORDS_MASK ];
382+ tbl [p & LX_PAGEWORDS_MASK ] = 0 ;
383+ if (IS_ALIGNED (++ p , (1u << LX_PAGEWORDS_SHIFT )))
384+ break ;
385+ }
386+ dma_sync_single_for_device (mmu -> dev , virt_to_phys (tbl ),
387+ PAGE_SIZE , DMA_TO_DEVICE );
360388 }
361389 }
362390
@@ -379,24 +407,8 @@ static int bcm2712_iommu_sync_range(struct iommu_domain *domain,
379407 if (iova_end <= iova )
380408 return 0 ;
381409
382- /* Ensure tables are cleaned from CPU cache or write-buffer */
383- spin_lock_irqsave (& mmu -> hw_lock , flags );
384- for (i = (iova - mmu -> aperture_base ) >> TABLES_LXPAGE_SHIFT ;
385- i <= (iova_end - mmu -> aperture_base - 1 ) >> TABLES_LXPAGE_SHIFT ; i ++ ) {
386- if (mmu -> tables [i ]) {
387- dma_sync_single_for_device (mmu -> dev ,
388- virt_to_phys (mmu -> tables [i ]),
389- PAGE_SIZE , DMA_TO_DEVICE );
390- }
391- }
392- if (mmu -> dirty_top ) {
393- dma_sync_single_for_device (mmu -> dev ,
394- virt_to_phys (mmu -> top_table ),
395- PAGE_SIZE , DMA_TO_DEVICE );
396- mmu -> dirty_top = false;
397- }
398-
399410 /* Flush the shared TLB cache */
411+ spin_lock_irqsave (& mmu -> hw_lock , flags );
400412 if (mmu -> cache )
401413 bcm2712_iommu_cache_flush (mmu -> cache );
402414
@@ -460,7 +472,7 @@ static void bcm2712_iommu_sync_all(struct iommu_domain *domain)
460472
461473static phys_addr_t bcm2712_iommu_iova_to_phys (struct iommu_domain * domain , dma_addr_t iova )
462474{
463- phys_addr_t addr = ( phys_addr_t )( - EINVAL ) ;
475+ phys_addr_t addr = 0 ;
464476 struct bcm2712_iommu * mmu = domain_to_mmu (domain );
465477
466478 if (iova >= mmu -> aperture_base && iova < mmu -> aperture_end ) {
0 commit comments