日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 运维知识 > linux >内容正文

linux

Linux内核最新的连续内存分配器(CMA)——避免预留大块内存【转】

發布時間:2025/3/18 linux 41 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Linux内核最新的连续内存分配器(CMA)——避免预留大块内存【转】 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

轉自:https://blog.csdn.net/21cnbao/article/details/7309757

在我們使用ARM等嵌入式Linux系統的時候,一個頭疼的問題是GPU,Camera,HDMI等都需要預留大量連續內存,這部分內存平時不用,但是一般的做法又必須先預留著。目前,Marek Szyprowski和Michal Nazarewicz實現了一套全新的Contiguous Memory Allocator。通過這套機制,我們可以做到不預留內存,這些內存平時是可用的,只有當需要的時候才被分配給Camera,HDMI等設備。下面分析它的基本代碼流程。

聲明連續內存

內核啟動過程中arch/arm/mm/init.c中的arm_memblock_init()會調用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));

該函數位于:drivers/base/dma-contiguous.c

  • /**
  • * dma_contiguous_reserve() - reserve area for contiguous memory handling
  • * @limit: End address of the reserved memory (optional, 0 for any).
  • *
  • * This function reserves memory from early allocator. It should be
  • * called by arch specific code once the early allocator (memblock or bootmem)
  • * has been activated and all other subsystems have already allocated/reserved
  • * memory.
  • */
  • void __init dma_contiguous_reserve(phys_addr_t limit)
  • {
  • unsigned long selected_size = 0;
  • pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
  • if (size_cmdline != -1) {
  • selected_size = size_cmdline;
  • } else {
  • #ifdef CONFIG_CMA_SIZE_SEL_MBYTES
  • selected_size = size_bytes;
  • #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
  • selected_size = cma_early_percent_memory();
  • #elif defined(CONFIG_CMA_SIZE_SEL_MIN)
  • selected_size = min(size_bytes, cma_early_percent_memory());
  • #elif defined(CONFIG_CMA_SIZE_SEL_MAX)
  • selected_size = max(size_bytes, cma_early_percent_memory());
  • #endif
  • }
  • if (selected_size) {
  • pr_debug("%s: reserving %ld MiB for global area\n", __func__,
  • selected_size / SZ_1M);
  • dma_declare_contiguous(NULL, selected_size, 0, limit);
  • }
  • };
  • 其中的size_bytes定義為:

    static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默認情況下,CMA_SIZE_MBYTES會被定義為16MB,來源于CONFIG_CMA_SIZE_MBYTES=16

    ->

  • int __init dma_declare_contiguous(struct device *dev, unsigned long size,
  • phys_addr_t base, phys_addr_t limit)
  • {
  • ...
  • /* Reserve memory */
  • if (base) {
  • if (memblock_is_region_reserved(base, size) ||
  • memblock_reserve(base, size) < 0) {
  • base = -EBUSY;
  • goto err;
  • }
  • } else {
  • /*
  • * Use __memblock_alloc_base() since
  • * memblock_alloc_base() panic()s.
  • */
  • phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
  • if (!addr) {
  • base = -ENOMEM;
  • goto err;
  • } else if (addr + size > ~(unsigned long)0) {
  • memblock_free(addr, size);
  • base = -EINVAL;
  • base = -EINVAL;
  • goto err;
  • } else {
  • base = addr;
  • }
  • }
  • /*
  • ???????? * Each reserved area must be initialised later, when more kernel
  • ???????? * subsystems (like slab allocator) are available.
  • ???????? */
  • r->start = base;
  • r->size = size;
  • r->dev = dev;
  • cma_reserved_count++;
  • pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,
  • (unsigned long)base);
  • /* Architecture specific contiguous memory fixup. */
  • dma_contiguous_early_fixup(base, size);
  • return 0;
  • err:
  • pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);
  • return base;
  • }
  • 由此可見,連續內存區域也是在內核啟動的早期,通過__memblock_alloc_base()拿到的。

    另外:

    drivers/base/dma-contiguous.c里面的core_initcall()會導致cma_init_reserved_areas()被調用:

  • static int __init cma_init_reserved_areas(void)
  • {
  • struct cma_reserved *r = cma_reserved;
  • unsigned i = cma_reserved_count;
  • pr_debug("%s()\n", __func__);
  • for (; i; --i, ++r) {
  • struct cma *cma;
  • cma = cma_create_area(PFN_DOWN(r->start),
  • r->size >> PAGE_SHIFT);
  • if (!IS_ERR(cma))
  • dev_set_cma_area(r->dev, cma);
  • }
  • return 0;
  • }
  • core_initcall(cma_init_reserved_areas);

  • cma_create_area()會調用cma_activate_area(),cma_activate_area()函數則會針對每個page調用:

    init_cma_reserved_pageblock(pfn_to_page(base_pfn));

    這個函數則會通過set_pageblock_migratetype(page, MIGRATE_CMA)將頁設置為MIGRATE_CMA類型的:

  • #ifdef CONFIG_CMA
  • /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
  • void __init init_cma_reserved_pageblock(struct page *page)
  • {
  • unsigned i = pageblock_nr_pages;
  • struct page *p = page;
  • do {
  • __ClearPageReserved(p);
  • set_page_count(p, 0);
  • } while (++p, --i);
  • set_page_refcounted(page);
  • set_pageblock_migratetype(page, MIGRATE_CMA);
  • __free_pages(page, pageblock_order);
  • totalram_pages += pageblock_nr_pages;
  • }
  • #endif
  • 同時其中調用的__free_pages(page, pageblock_order);最終會調用到__free_one_page(page, zone, order, migratetype);
    相關的page會被加到MIGRATE_CMA的free_list上面去:

    list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);

    ?

    申請連續內存

    申請連續內存仍然使用標準的arch/arm/mm/dma-mapping.c中定義的dma_alloc_coherent()和dma_alloc_writecombine(),這二者會間接調用drivers/base/dma-contiguous.c中的

  • struct page *dma_alloc_from_contiguous(struct device *dev, int count,
  • unsigned int align)
  • ?

    ->

    ?

  • struct page *dma_alloc_from_contiguous(struct device *dev, int count,
  • unsigned int align)
  • {
  • ...
  • for (;;) {
  • pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
  • start, count, mask);
  • if (pageno >= cma->count) {
  • ret = -ENOMEM;
  • goto error;
  • }
  • pfn = cma->base_pfn + pageno;
  • ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
  • if (ret == 0) {
  • bitmap_set(cma->bitmap, pageno, count);
  • break;
  • } else if (ret != -EBUSY) {
  • goto error;
  • }
  • pr_debug("%s(): memory range at %p is busy, retrying\n",
  • __func__, pfn_to_page(pfn));
  • /* try again with a bit different memory target */
  • start = pageno + mask + 1;
  • }
  • ...
  • }
  • ->

    int alloc_contig_range(unsigned long start, unsigned long end,

    ?????????????????????? unsigned migratetype)

    需要隔離page,隔離page的作用通過代碼的注釋可以體現:

  • /*
  • * What we do here is we mark all pageblocks in range as
  • * MIGRATE_ISOLATE. Because of the way page allocator work, we
  • * align the range to MAX_ORDER pages so that page allocator
  • * won't try to merge buddies from different pageblocks and
  • * change MIGRATE_ISOLATE to some other migration type.
  • *
  • * Once the pageblocks are marked as MIGRATE_ISOLATE, we
  • * migrate the pages from an unaligned range (ie. pages that
  • * we are interested in). This will put all the pages in
  • * range back to page allocator as MIGRATE_ISOLATE.
  • *
  • * When this is done, we take the pages in range from page
  • * allocator removing them from the buddy system. This way
  • * page allocator will never consider using them.
  • *
  • * This lets us mark the pageblocks back as
  • * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
  • * MAX_ORDER aligned range but not in the unaligned, original
  • * range are put back to page allocator so that buddy can use
  • * them.
  • */
  • ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),
  • pfn_align_to_maxpage_up(end),
  • migratetype);

  • 簡單地說,就是把相關的page標記為MIGRATE_ISOLATE,這樣buddy系統就不會再使用他們。

    ?

  • /*
  • * start_isolate_page_range() -- make page-allocation-type of range of pages
  • * to be MIGRATE_ISOLATE.
  • * @start_pfn: The lower PFN of the range to be isolated.
  • * @end_pfn: The upper PFN of the range to be isolated.
  • * @migratetype: migrate type to set in error recovery.
  • *
  • * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
  • * the range will never be allocated. Any free pages and pages freed in the
  • * future will not be allocated again.
  • *
  • * start_pfn/end_pfn must be aligned to pageblock_order.
  • * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
  • */
  • int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
  • unsigned migratetype)
  • {
  • unsigned long pfn;
  • unsigned long undo_pfn;
  • struct page *page;
  • BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
  • BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
  • for (pfn = start_pfn;
  • pfn < end_pfn;
  • pfn += pageblock_nr_pages) {
  • page = __first_valid_page(pfn, pageblock_nr_pages);
  • if (page && set_migratetype_isolate(page)) {
  • undo_pfn = pfn;
  • goto undo;
  • }
  • }
  • return 0;
  • undo:
  • for (pfn = start_pfn;
  • pfn < undo_pfn;
  • pfn += pageblock_nr_pages)
  • unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
  • return -EBUSY;
  • }
  • ?

    接下來調用__alloc_contig_migrate_range()進行頁面隔離和遷移:

  • static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
  • {
  • /* This function is based on compact_zone() from compaction.c. */
  • unsigned long pfn = start;
  • unsigned int tries = 0;
  • int ret = 0;
  • struct compact_control cc = {
  • .nr_migratepages = 0,
  • .order = -1,
  • .zone = page_zone(pfn_to_page(start)),
  • .sync = true,
  • };
  • INIT_LIST_HEAD(&cc.migratepages);
  • migrate_prep_local();
  • while (pfn < end || !list_empty(&cc.migratepages)) {
  • if (fatal_signal_pending(current)) {
  • ret = -EINTR;
  • break;
  • }
  • if (list_empty(&cc.migratepages)) {
  • cc.nr_migratepages = 0;
  • pfn = isolate_migratepages_range(cc.zone, &cc,
  • pfn, end);
  • if (!pfn) {
  • ret = -EINTR;
  • break;
  • }
  • tries = 0;
  • } else if (++tries == 5) {
  • ret = ret < 0 ? ret : -EBUSY;
  • break;
  • }
  • ret = migrate_pages(&cc.migratepages,
  • __alloc_contig_migrate_alloc,
  • 0, false, true);
  • }
  • putback_lru_pages(&cc.migratepages);
  • return ret > 0 ? 0 : ret;
  • }
  • 其中的函數migrate_pages()會完成頁面的遷移,遷移過程中通過傳入的__alloc_contig_migrate_alloc()申請新的page,并將老的page付給新的page:

  • int migrate_pages(struct list_head *from,
  • new_page_t get_new_page, unsigned long private, bool offlining,
  • bool sync)
  • {
  • int retry = 1;
  • int nr_failed = 0;
  • int pass = 0;
  • struct page *page;
  • struct page *page2;
  • int swapwrite = current->flags & PF_SWAPWRITE;
  • int rc;
  • if (!swapwrite)
  • current->flags |= PF_SWAPWRITE;
  • for(pass = 0; pass < 10 && retry; pass++) {
  • retry = 0;
  • list_for_each_entry_safe(page, page2, from, lru) {
  • cond_resched();
  • rc = unmap_and_move(get_new_page, private,
  • page, pass > 2, offlining,
  • sync);
  • switch(rc) {
  • case -ENOMEM:
  • goto out;
  • case -EAGAIN:
  • retry++;
  • break;
  • case 0:
  • break;
  • default:
  • /* Permanent failure */
  • nr_failed++;
  • break;
  • }
  • }
  • }
  • rc = 0;
  • ...
  • }
  • 其中的unmap_and_move()函數較為關鍵,它定義在mm/migrate.c中

  • /*
  • * Obtain the lock on page, remove all ptes and migrate the page
  • * to the newly allocated page in newpage.
  • */
  • static int unmap_and_move(new_page_t get_new_page, unsigned long private,
  • struct page *page, int force, bool offlining, bool sync)
  • {
  • int rc = 0;
  • int *result = NULL;
  • struct page *newpage = get_new_page(page, private, &result);
  • int remap_swapcache = 1;
  • int charge = 0;
  • struct mem_cgroup *mem = NULL;
  • struct anon_vma *anon_vma = NULL;
  • ...
  • /* charge against new page */
  • charge = mem_cgroup_prepare_migration(page, newpage, &mem);
  • ...
  • if (PageWriteback(page)) {
  • if (!force || !sync)
  • goto uncharge;
  • wait_on_page_writeback(page);
  • }
  • /*
  • * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
  • * we cannot notice that anon_vma is freed while we migrates a page.
  • * This get_anon_vma() delays freeing anon_vma pointer until the end
  • * of migration. File cache pages are no problem because of page_lock()
  • * File Caches may use write_page() or lock_page() in migration, then,
  • * just care Anon page here.
  • */
  • if (PageAnon(page)) {
  • /*
  • * Only page_lock_anon_vma() understands the subtleties of
  • * getting a hold on an anon_vma from outside one of its mms.
  • */
  • anon_vma = page_lock_anon_vma(page);
  • if (anon_vma) {
  • /*
  • * Take a reference count on the anon_vma if the
  • * page is mapped so that it is guaranteed to
  • * exist when the page is remapped later
  • */
  • get_anon_vma(anon_vma);
  • page_unlock_anon_vma(anon_vma);
  • } else if (PageSwapCache(page)) {
  • /*
  • * We cannot be sure that the anon_vma of an unmapped
  • * swapcache page is safe to use because we don't
  • * know in advance if the VMA that this page belonged
  • * to still exists. If the VMA and others sharing the
  • * data have been freed, then the anon_vma could
  • * already be invalid.
  • *
  • * To avoid this possibility, swapcache pages get
  • * migrated but are not remapped when migration
  • * completes
  • */
  • remap_swapcache = 0;
  • } else {
  • goto uncharge;
  • }
  • }
  • ...
  • /* Establish migration ptes or remove ptes */
  • try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
  • skip_unmap:
  • if (!page_mapped(page))
  • rc = move_to_new_page(newpage, page, remap_swapcache);
  • if (rc && remap_swapcache)
  • remove_migration_ptes(page, page);
  • /* Drop an anon_vma reference if we took one */
  • if (anon_vma)
  • drop_anon_vma(anon_vma);
  • uncharge:
  • if (!charge)
  • mem_cgroup_end_migration(mem, page, newpage, rc == 0);
  • unlock:
  • unlock_page(page);
  • move_newpage:
  • ...
  • }
  • 通過unmap_and_move(),老的page就被遷移過去新的page。

    接下來要回收page,回收page的作用是,不至于因為拿了連續的內存后,系統變得內存饑餓:

    ->

  • /*
  • * Reclaim enough pages to make sure that contiguous allocation
  • * will not starve the system.
  • */
  • __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);

  • ->

  • /*
  • * Trigger memory pressure bump to reclaim some pages in order to be able to
  • * allocate 'count' pages in single page units. Does similar work as
  • *__alloc_pages_slowpath() function.
  • */
  • static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
  • {
  • enum zone_type high_zoneidx = gfp_zone(gfp_mask);
  • struct zonelist *zonelist = node_zonelist(0, gfp_mask);
  • int did_some_progress = 0;
  • int order = 1;
  • unsigned long watermark;
  • /*
  • * Increase level of watermarks to force kswapd do his job
  • * to stabilise at new watermark level.
  • */
  • __update_cma_watermarks(zone, count);
  • /* Obey watermarks as if the page was being allocated */
  • watermark = low_wmark_pages(zone) + count;
  • while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) {
  • wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
  • did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
  • NULL);
  • if (!did_some_progress) {
  • /* Exhausted what can be done so it's blamo time */
  • out_of_memory(zonelist, gfp_mask, order, NULL);
  • }
  • }
  • /* Restore original watermark levels. */
  • __update_cma_watermarks(zone, -count);
  • return count;
  • }
  • ?

    釋放連續內存

    內存釋放的時候也比較簡單,直接就是:

    arch/arm/mm/dma-mapping.c:

    void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)

    ->

    arch/arm/mm/dma-mapping.c:

  • static void __free_from_contiguous(struct device *dev, struct page *page,
  • size_t size)
  • {
  • __dma_remap(page, size, pgprot_kernel);
  • dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
  • }

  • ->

  • bool dma_release_from_contiguous(struct device *dev, struct page *pages,
  • int count)
  • {
  • ...
  • free_contig_range(pfn, count);
  • ..
  • }

  • ->

  • void free_contig_range(unsigned long pfn, unsigned nr_pages)
  • {
  • for (; nr_pages--; ++pfn)
  • __free_page(pfn_to_page(pfn));
  • }
  • 將page交還給buddy。

    ?

    內核內存分配的migratetype

    內核內存分配的時候,帶的標志是GFP_,但是GFP_可以轉化為migratetype:

  • static inline int allocflags_to_migratetype(gfp_t gfp_flags)
  • {
  • WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
  • if (unlikely(page_group_by_mobility_disabled))
  • return MIGRATE_UNMOVABLE;
  • /* Group based on mobility */
  • return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
  • ((gfp_flags & __GFP_RECLAIMABLE) != 0);
  • }
  • 之后申請內存的時候,會對比遷移類型匹配的free_list:

  • page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
  • zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
  • preferred_zone, migratetype);
  • 另外,筆者也編寫了一個測試程序,透過它隨時測試CMA的功能:

  • /*
  • * kernel module helper for testing CMA
  • *
  • * Licensed under GPLv2 or later.
  • */
  • #include <linux/module.h>
  • #include <linux/device.h>
  • #include <linux/fs.h>
  • #include <linux/miscdevice.h>
  • #include <linux/dma-mapping.h>
  • #define CMA_NUM 10
  • static struct device *cma_dev;
  • static dma_addr_t dma_phys[CMA_NUM];
  • static void *dma_virt[CMA_NUM];
  • /* any read request will free coherent memory, eg.
  • * cat /dev/cma_test
  • */
  • static ssize_t
  • cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
  • {
  • int i;
  • for (i = 0; i < CMA_NUM; i++) {
  • if (dma_virt[i]) {
  • dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);
  • _dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);
  • dma_virt[i] = NULL;
  • break;
  • }
  • }
  • return 0;
  • }
  • /*
  • * any write request will alloc coherent memory, eg.
  • * echo 0 > /dev/cma_test
  • */
  • static ssize_t
  • cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
  • {
  • int i;
  • int ret;
  • for (i = 0; i < CMA_NUM; i++) {
  • if (!dma_virt[i]) {
  • dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);
  • if (dma_virt[i]) {
  • void *p;
  • /* touch every page in the allocated memory */
  • for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)
  • *(u32 *)p = 0;
  • _dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);
  • } else {
  • dev_err(cma_dev, "no mem in CMA area\n");
  • ret = -ENOMEM;
  • }
  • break;
  • }
  • }
  • return count;
  • }
  • static const struct file_operations cma_test_fops = {
  • .owner = THIS_MODULE,
  • .read = cma_test_read,
  • .write = cma_test_write,
  • };
  • static struct miscdevice cma_test_misc = {
  • .name = "cma_test",
  • .fops = &cma_test_fops,
  • };
  • static int __init cma_test_init(void)
  • {
  • int ret = 0;
  • ret = misc_register(&cma_test_misc);
  • if (unlikely(ret)) {
  • pr_err("failed to register cma test misc device!\n");
  • return ret;
  • }
  • cma_dev = cma_test_misc.this_device;
  • cma_dev->coherent_dma_mask = ~0;
  • _dev_info(cma_dev, "registered.\n");
  • return ret;
  • }
  • module_init(cma_test_init);
  • static void __exit cma_test_exit(void)
  • {
  • misc_deregister(&cma_test_misc);
  • }
  • module_exit(cma_test_exit);
  • MODULE_LICENSE("GPL");
  • MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
  • MODULE_DESCRIPTION("kernel module to help the test of CMA");
  • MODULE_ALIAS("CMA test");

  • 申請內存:

    # echo 0 > /dev/cma_test

    釋放內存:

    # cat /dev/cma_test


    參考鏈接:

    [1] http://www.spinics.net/lists/arm-kernel/msg160854.html

    [2] http://www.spinics.net/lists/arm-kernel/msg162063.html

    [3] http://lwn.net/Articles/447405/

    轉載于:https://www.cnblogs.com/sky-heaven/p/9549482.html

    總結

    以上是生活随笔為你收集整理的Linux内核最新的连续内存分配器(CMA)——避免预留大块内存【转】的全部內容,希望文章能夠幫你解決所遇到的問題。

    如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。