什么是CMjava内存分配配技术

泰晓科技 - 一张图看明白手机设备内存管理
|一张图看明白手机设备内存管理 创作于
by Chen Jie of
一张源自『Memory Management in Tizen ()』的图,介绍了地道的手机设备内存管理:相机、codec、radio,由内核 V4L2 子系统处理,使用 VB2 () 接口来管理设备内存。GPU 由 DRM 子系统处理,使用 GEM () 接口来管理设备内存。跨子系统的设备内存共享,例如显示相机的一帧,从 V4L2 经 相机 App 到 DRM,借助 DMA-Buf() 机制。当设备内存可以共享时,同步问题也随之而来。由此对 DMA-Buf 进一步引入了 。对于手机而言,并无专用内存,即所有设备内存均从系统内存分配。因此落实上述设备内存,通常使用 CMA(),@teawater 同学对 CMA 贡献了诸多改进,可参阅『』。CMA 用来分配连续物理内存,因此还有较大的限制。如果硬件足够高大上,即支持 IOMMU 机制,就可以摆脱这种限制。就像借助 MMU 单元,CPU 可以虚拟分页访问物理内存一样;借助 IOMMU 单元,IO 设备达到同样的目的。支付宝打赏¥9.68元微信打赏¥9.68元
赏泰晓原创,扫码助力! Read Related: Read Latest:本作品由
创作,采用
进行许可。未经授权,谢绝商业使用!& 2010 ~ 2017 泰晓科技 |今天看啥 热点:
CMA连续物理内存用户空间映射---(一),cma---
在多媒体和图像处理等应用中,经常用到大块内存,尤其是硬件编解码,需要内核分配大块的物理连续内存。
这里希望通过把从内核分配的连续物理内存映射到用户空间,在用户空间经过处理,又可以入队到驱动中。
Kernel Config中 根据需求配置和调整CMA的大小。
1、驱动注册misc设备;
2、驱动实现IOCTL的内存分配,使用dma_alloc_writecombine从CMA中拿出一个内存;
3、驱动实现mmap,通过remap_pfn_range,把上面第二步dma分配到的物理内存映射到用户空间;
1、用户打开设备节点/dev/cma_mem;
2、通过ioctl命令,设置需要分配的大小;
3、通过mmap映射;
测试环境:
Linux-3.9.7
arm-linux-gcc 4.5.1
#include &linux/miscdevice.h&
#include &linux/platform_device.h&
#include &linux/fs.h&
#include &linux/file.h&
#include &linux/mm.h&
#include &linux/list.h&
#include &linux/mutex.h&
#include &linux/debugfs.h&
#include &linux/mempolicy.h&
#include &linux/sched.h&
#include &linux/module.h&
#include &asm/io.h&
#include &asm/uaccess.h&
#include &asm/cacheflush.h&
#include &linux/dma-mapping.h&
#include &linux/export.h&
#include &cma_mem.h&
#define DEVICE_NAME &cma_mem&
enum cma_status{
UNKNOW_STATUS = 0,
HAVE_ALLOCED = 1,
HAVE_MMAPED =2,
struct cmamem_dev {
struct mutex cmamem_
struct list_head info_
struct current_status{
dma_addr_t phy_
static struct current_status cmamem_
static struct cmamem_dev cmamem_
static struct cmamem_info cma_info[32];
static long cmamem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
int ret = 0;
int size = 0;
dma_addr_t map_
switch(cmd){
case CMEM_ALLOCATE:
printk(KERN_ERR&cmamem_ioctl:CMEM_ALLOCATE\n&);
cmamem_status.id_count = cmamem_dev.count++;
cma_info[cmamem_status.id_count].id = cmamem_status.id_
if ((ret = copy_from_user(&cma_info[cmamem_status.id_count], (void __user *)arg,
sizeof(struct cmamem_info))))
printk(KERN_ERR&cmamem_ioctl:CMEM_ALLOCATE:copy_from_user error:%d\n&, ret);
ret = -EFAULT;
size = cma_info[cmamem_status.id_count].
size = PAGE_ALIGN(size);
if(size == 0)
printk(KERN_ERR&size is 0\n&);
ret = -ENOMEM;
printk(KERN_ERR&cmamem_ioctl:CMEM_ALLOCATE:start alloc:%d,size:%d\n&, cmamem_status.id_count, cma_info[cmamem_status.id_count].len);
cma_info[cmamem_status.id_count].mem_base = (unsigned int)dma_alloc_writecombine(NULL, size, &map_dma, GFP_KERNEL);
if (!cma_info[cmamem_status.id_count].mem_base){
printk(KERN_ERR &dma alloc fail:%d!\n&, __LINE__);
ret = -ENOMEM;
printk(KERN_ERR&map_dma:0x%08x,size:%d\n&, map_dma, size);
cma_info[cmamem_status.id_count].phy_base = map_
cmamem_status.phy_base = map_
mutex_lock(&cmamem_dev.cmamem_lock);
cmamem_status.status = HAVE_ALLOCED;
mutex_unlock(&cmamem_dev.cmamem_lock);
printk(KERN_INFO &cma mem not support command\n&);
static int cmamem_mmap(struct file *filp, struct vm_area_struct *vma)
unsigned long start = vma-&vm_
unsigned long size = vma-&vm_end - vma-&vm_
unsigned long offset = vma-&vm_pgoff && PAGE_SHIFT;
unsigned long page,
//dump_stack();
if(cmamem_status.status != HAVE_ALLOCED)
printk(KERN_ERR&%s, you should allocted memory firstly\n&, __func__);
return -EINVAL;
printk( &start=0x%08x offset=0x%08x\n&, (unsigned int)start, (unsigned int)offset );
pos = (unsigned long)cmamem_status.phy_base +
page = pos && PAGE_SHIFT ;
if( remap_pfn_range( vma, start, page, size, PAGE_SHARED )) {
return -EAGAIN;
printk( &remap_pfn_range %u\n success\n&, (unsigned int)page );
vma-&vm_flags &= ~VM_IO;
vma-&vm_flags |=
(VM_DONTEXPAND | VM_DONTDUMP);
static struct file_operations dev_fops = {
= THIS_MODULE,
.unlocked_ioctl = cmamem_ioctl,
.mmap = cmamem_mmap,
static int __init cmamem_init(void)
printk(KERN_ERR&%s\n&, __func__);
mutex_init(&cmamem_dev.cmamem_lock);
INIT_LIST_HEAD(&_list);
cmamem_dev.count = 0;
cmamem_dev.dev.name = DEVICE_NAME;
cmamem_dev.dev.minor = MISC_DYNAMIC_MINOR;
cmamem_dev.dev.fops = &dev_
cmamem_status.status = UNKNOW_STATUS;
cmamem_status.id_count = -1;
cmamem_status.phy_base = 0;
return misc_register(&cmamem_dev.dev);
static void __exit cmamem_exit(void)
printk(KERN_ERR&%s\n&, __func__);
misc_deregister(&cmamem_dev.dev);
module_init(cmamem_init);
module_exit(cmamem_exit);
MODULE_LICENSE(&GPL&);
#ifndef _CMA_MEM_H_
#define _CMA_MEM_H_
#define CMEM_IOCTL_MAGIC 'm'
#define CMEM_GET_PHYS
_IOW(CMEM_IOCTL_MAGIC, 1, unsigned int)
#define CMEM_MAP
_IOW(CMEM_IOCTL_MAGIC, 2, unsigned int)
#define CMEM_GET_SIZE
_IOW(CMEM_IOCTL_MAGIC, 3, unsigned int)
#define CMEM_UNMAP
_IOW(CMEM_IOCTL_MAGIC, 4, unsigned int)
#define CMEM_ALLOCATE
_IOW(CMEM_IOCTL_MAGIC, 5, unsigned int)
#define CMEM_CONNECT
_IOW(CMEM_IOCTL_MAGIC, 6, unsigned int)
#define CMEM_GET_TOTAL_SIZE _IOW(CMEM_IOCTL_MAGIC, 7, unsigned int)
#define CMEM_CACHE_FLUSH _IOW(CMEM_IOCTL_MAGIC, 8, unsigned int)
struct cmamem_info {
unsigned int phy_
unsigned int mem_
// struct list_
#endifMakefile
KERN_DIR = /work/kernel/linux-3.9.7
make -C $(KERN_DIR) M=`pwd` modules
make -C $(KERN_DIR) M=`pwd` modules clean
rm -rf modules.order
obj-m += cma_mem.o
用户测试程序
#include &stdio.h&
#include &stdarg.h&
#include &string.h&
#include &errno.h&
#include &stdlib.h&
#include &sys/types.h&
#include &sys/stat.h&
#include &fcntl.h&
#include &time.h&
#include &sys/mman.h&
#include &assert.h&
#include &linux/videodev2.h&
#include &linux/fb.h&
#include &pthread.h&
#include &poll.h&
#include &semaphore.h&
#define CMEM_IOCTL_MAGIC 'm'
#define CMEM_GET_PHYS
_IOW(CMEM_IOCTL_MAGIC, 1, unsigned int)
#define CMEM_MAP
_IOW(CMEM_IOCTL_MAGIC, 2, unsigned int)
#define CMEM_GET_SIZE
_IOW(CMEM_IOCTL_MAGIC, 3, unsigned int)
#define CMEM_UNMAP
_IOW(CMEM_IOCTL_MAGIC, 4, unsigned int)
#define CMEM_ALLOCATE
_IOW(CMEM_IOCTL_MAGIC, 5, unsigned int)
#define CMEM_CONNECT
_IOW(CMEM_IOCTL_MAGIC, 6, unsigned int)
#define CMEM_GET_TOTAL_SIZE _IOW(CMEM_IOCTL_MAGIC, 7, unsigned int)
#define CMEM_CACHE_FLUSH _IOW(CMEM_IOCTL_MAGIC, 8, unsigned int)
struct cmamem_info {
unsigned long phy_
unsigned long mem_
// struct list_
int main()
void *cmem_
struct cmamem_
cmem_fd = open(&/dev/cma_mem&, O_RDWR, 0);//打开设备,为了操作硬件引擎,要noncache的
printf(&cmem_fd:%d\n&, cmem_fd);
if (cmem_fd &= 0)
memset(&region, 0x00, sizeof(struct cmamem_info));
region.len = 800 * 480 * 4;
if (ioctl(cmem_fd, CMEM_ALLOCATE, &region) & 0) //获取全部空间
perror(&PMEM_GET_TOTAL_SIZE failed\n&);
return -1;
size = region.
cmem_base = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, cmem_fd, 0);//mmap操作
printf(&cmem_base:0x%08x,region.len:0x%08x offset:0x%08x\n&,(unsigned int)cmem_base, region.len, region.offset);
if (cmem_base == MAP_FAILED)
{ cmem_base = 0;
close(cmem_fd);
cmem_fd = -1;
perror(&mmap pmem error!\n&);
for(i = 0; i & 10; i++)
((unsigned int *)cmem_base)[i] =
printf(&pmem_base:0x%08x\n&, cmem_base);
for(i = 0; i & 10; i++)
printf(&%d\n&, ((unsigned int *)cmem_base)[i]);
close(cmem_fd);
我们可以先查看一下计算机的物理内存可以看到计算机的物理内存是1.8G。接下来我们来运行这个程序在刚开始的时候,内核还可以简单地利用空闲着的物理内存满足应用程序的内存需求,但当物理内存用完之后,它就开始使用所谓的交换空间(swap space)。在UNIX的大多数版本上,交换空间指的是一块独立的硬盘空间。如果你熟悉微软公司的Windows,就能看出UNIX交换空间和微软的Windows交换文件(swap file)之间的相似之处。但与微软Windows不同的是:在UNIX的交换空间里没有局部堆(local heap)、全局堆(global heap)或其他可丢弃内存段等让人操心的东西--UNIX操作系统的内核把这些管理工作都替用户包下来了。在物理内存和交换空间之间的挪动数据和程序代码的工作完全由内核来负责,因此,每当用户对内存进行读写的时候,数据总像是早已等在物理内存里了,而事实上它是在用户准备访问它之前刚刚分配或交换过来的。用更专业一点的术语来说,UNIX实现了一个请求页面虚拟内存系统。用户在程序看到的所有内存都是虚拟的,即在程序使用的物理地址上并不存在真是的内存。UNIX把所有的内存分成一页一页的,一页通常是4096个字节。每当程序试图访问内存的时候,就会出现一次虚拟内存和物理内存的转换,它的具体做法和所花费的时间将取决于用户使用的硬件的具体情况。如果被访问的内存没有在物理内存中,就会产生一个页面错误(page fault),而控制权就会上交给UNIX操作系统内核。UNIX对被访问内存地址进行检查,如果这是一个允许该程序使用的合法地址,它就会确定需要向程序提供哪一个物理内存页面。然后,如果该数据从没被写过,就为它新分配一个内存页面;如果数据已经被保存到硬盘的交换空间里去了,就把包含该数据的内存页面读回物理内存(可能需要把一个现存页面转移到硬盘上去)。接着,再把虚拟内存地址映射到与之对应的物理内存之后,它再让用户程序继续执行。这些操作不需要UNIX应用程序本身去操行,因为这一切都隐藏在UNIX操作系统的内核里。最终,如果应用程序耗尽了物理内存和交换空间,或者说如果堆栈超出其最大长度,UNIX操作系统的内核就会拒绝此后的内存申请。根据这种做法,内存的供应量明显地是没有极限的,那么,这是不是意味着对malloc返回情况的检查没有意义了呢?绝不是。使用动态分配内存地址的C语言程序经常会出现这样一个常见的问题,即试图在某个已分配内存块以外的地方写数据。当这种情况发生的时候,程序并不会立即终止,但很有可能已经覆盖了malloc函数库例程内部使用的某些数据。出现这种问题之后,常见的结果是后续的malloc调用无法继续进行,不是因为没有内存可供分配,而是因为内存的结构被破坏了。追踪这类问题是相当困难的,并且在程序里发现问题越早,找到解决问题的机会就越大。
4GB是每个进程的虚拟地址空间,当程序运行的时候,会由操作系统以分页的方式,部分映射到物理内存。所有统统的函数都是操作的虚拟内存,就是那默认的那4GB,就连内核方式下的也如此.物理内存对用户是不可见的,也是不可操作的内存映射文件和虚拟内存相似,只是不同的是,虚拟内存是系统的页文件,而内存映射文件是物理磁盘文件.它的工作方式是以文件的某一段为映射的起始和终点, 比如文件大小为100KB,我从开头映射到末尾,就是100*个字节,那么我们可以在虚拟的内存中间内保留一个地址空间,比如说为0X12345(首)----&?&尾&,在需要的时候我们可以把磁盘文件提交到那个区域里面,那么我们就可以在这段内存里面来操作文件..dll和.exe就是按这个方式来加载起来的.所以说,所有函数都不是直接操作物理内存的.
相关搜索:
相关阅读:
相关频道:
Android教程最近更新Linux内核最新的连续内存分配器(CMA)——避免预留大块内存 - CSDN博客
Linux内核最新的连续内存分配器(CMA)——避免预留大块内存
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& by 宋宝华
在我们使用ARM等嵌入式Linux系统的时候,一个头疼的问题是GPU,Camera,HDMI等都需要预留大量连续内存,这部分内存平时不用,但是一般的做法又必须先预留着。目前,Marek Szyprowski和Michal Nazarewicz实现了一套全新的Contiguous Memory Allocator。通过这套机制,我们可以做到不预留内存,这些内存平时是可用的,只有当需要的时候才被分配给Camera,HDMI等设备。下面分析它的基本代码流程。
声明连续内存
内核启动过程中arch/arm/mm/init.c中的arm_memblock_init()会调用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
该函数位于:drivers/base/dma-contiguous.c
* dma_contiguous_reserve() - reserve area for contiguous memory handling
* @limit: End address of the reserved memory (optional, 0 for any).
* This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
* has been activated and all other subsystems have already allocated/reserved
void __init dma_contiguous_reserve(phys_addr_t limit)
unsigned long selected_size = 0;
pr_debug(&%s(limit %08lx)\n&, __func__, (unsigned long)limit);
if (size_cmdline != -1) {
selected_size = size_
#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
selected_size = size_
#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
selected_size = cma_early_percent_memory();
#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
selected_size = min(size_bytes, cma_early_percent_memory());
#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
selected_size = max(size_bytes, cma_early_percent_memory());
if (selected_size) {
pr_debug(&%s: reserving %ld MiB for global area\n&, __func__,
selected_size / SZ_1M);
dma_declare_contiguous(NULL, selected_size, 0, limit);
其中的size_bytes定义为:
static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默认情况下,CMA_SIZE_MBYTES会被定义为16MB,来源于CONFIG_CMA_SIZE_MBYTES=16
int __init dma_declare_contiguous(struct device *dev, unsigned long size,
phys_addr_t base, phys_addr_t limit)
/* Reserve memory */
if (base) {
if (memblock_is_region_reserved(base, size) ||
memblock_reserve(base, size) & 0) {
base = -EBUSY;
* Use __memblock_alloc_base() since
* memblock_alloc_base() panic()s.
phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
if (!addr) {
base = -ENOMEM;
} else if (addr + size & ~(unsigned long)0) {
memblock_free(addr, size);
base = -EINVAL;
&&&&&&&&&&&&&&&&&&&&&&& base = -EINVAL;
&&&&&&&&&&&&&&&&&&&&&&&
&&&&&&&&&&&&&&& } else {
&&&&&&&&&&&&&&&&&&&&&&& base =
&&&&&&&&&&&&&&& }
&&&&&&& /*
&&&&&&&& * Each reserved area must be initialised later, when more kernel
&&&&&&&& * subsystems (like slab allocator) are available.
&&&&&&&& */
&&&&&&& r-&start =
&&&&&&& r-&size =
&&&&&&& r-&dev =
&&&&&&& cma_reserved_count++;
&&&&&&& pr_info(&CMA: reserved %ld MiB at %08lx\n&, size / SZ_1M,
&&&&&&&&&&&&&&& (unsigned long)base);
&&&&&&& /* Architecture specific contiguous memory fixup. */
&&&&&&& dma_contiguous_early_fixup(base, size);
&&&&&&& return 0;
&&&&&&& pr_err(&CMA: failed to reserve %ld MiB\n&, size / SZ_1M);
由此可见,连续内存区域也是在内核启动的早期,通过__memblock_alloc_base()拿到的。
drivers/base/dma-contiguous.c里面的core_initcall()会导致cma_init_reserved_areas()被调用:
static int __init cma_init_reserved_areas(void)
struct cma_reserved *r = cma_
unsigned i = cma_reserved_
pr_debug(&%s()\n&, __func__);
for (; --i, ++r) {
struct cma *
cma = cma_create_area(PFN_DOWN(r-&start),
r-&size && PAGE_SHIFT);
if (!IS_ERR(cma))
dev_set_cma_area(r-&dev, cma);
core_initcall(cma_init_reserved_areas);
cma_create_area()会调用cma_activate_area(),cma_activate_area()函数则会针对每个page调用:
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
这个函数则会通过set_pageblock_migratetype(page, MIGRATE_CMA)将页设置为MIGRATE_CMA类型的:
#ifdef CONFIG_CMA
/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
unsigned i = pageblock_nr_
struct page *p =
__ClearPageReserved(p);
set_page_count(p, 0);
} while (++p, --i);
set_page_refcounted(page);
set_pageblock_migratetype(page, MIGRATE_CMA);
__free_pages(page, pageblock_order);
totalram_pages += pageblock_nr_
#endif同时其中调用的__free_pages(page, pageblock_order);最终会调用到__free_one_page(page, zone, order, migratetype);
相关的page会被加到MIGRATE_CMA的free_list上面去:
list_add(&page-&lru, &zone-&free_area[order].free_list[migratetype]);
申请连续内存
申请连续内存仍然使用标准的arch/arm/mm/dma-mapping.c中定义的dma_alloc_coherent()和dma_alloc_writecombine(),这二者会间接调用drivers/base/dma-contiguous.c中的
struct page *dma_alloc_from_contiguous(struct device *dev, int count,
unsigned int align)
struct page *dma_alloc_from_contiguous(struct device *dev, int count,
unsigned int align)
for (;;) {
pageno = bitmap_find_next_zero_area(cma-&bitmap, cma-&count,
start, count, mask);
if (pageno &= cma-&count) {
ret = -ENOMEM;
pfn = cma-&base_pfn +
ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
if (ret == 0) {
bitmap_set(cma-&bitmap, pageno, count);
} else if (ret != -EBUSY) {
pr_debug(&%s(): memory range at %p is busy, retrying\n&,
__func__, pfn_to_page(pfn));
/* try again with a bit different memory target */
start = pageno + mask + 1;
int alloc_contig_range(unsigned long start, unsigned long end,
&&&&&&&&&&&&&&&&&&&&&& unsigned migratetype)
需要隔离page,隔离page的作用通过代码的注释可以体现:
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE.
Because of the way page allocator work, we
* align the range to MAX_ORDER pages so that page allocator
* won't try to merge buddies from different pageblocks and
* change MIGRATE_ISOLATE to some other migration type.
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
* migrate the pages from an unaligned range (ie. pages that
* we are interested in).
This will put all the pages in
* range back to page allocator as MIGRATE_ISOLATE.
* When this is done, we take the pages in range from page
* allocator removing them from the buddy system.
* page allocator will never consider using them.
* This lets us mark the pageblocks back as
* MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
* MAX_ORDER aligned range but not in the unaligned, original
* range are put back to page allocator so that buddy can use
ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),
pfn_align_to_maxpage_up(end),
migratetype);
简单地说,就是把相关的page标记为MIGRATE_ISOLATE,这样buddy系统就不会再使用他们。
* start_isolate_page_range() -- make page-allocation-type of range of pages
* to be MIGRATE_ISOLATE.
* @start_pfn: The lower PFN of the range to be isolated.
* @end_pfn: The upper PFN of the range to be isolated.
* @migratetype: migrate type to set in error recovery.
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
* future will not be allocated again.
* start_pfn/end_pfn must be aligned to pageblock_order.
* Returns 0 on success and -EBUSY if any part of range cannot be isolated.
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype)
unsigned long undo_
struct page *
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
for (pfn = start_
pfn & end_
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
if (page && set_migratetype_isolate(page)) {
undo_pfn =
for (pfn = start_
pfn & undo_
pfn += pageblock_nr_pages)
unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
return -EBUSY;
接下来调用__alloc_contig_migrate_range()进行页面隔离和迁移:
static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
/* This function is based on compact_zone() from compaction.c. */
unsigned long pfn =
unsigned int tries = 0;
int ret = 0;
struct compact_control cc = {
.nr_migratepages = 0,
.order = -1,
.zone = page_zone(pfn_to_page(start)),
.sync = true,
INIT_LIST_HEAD(&cc.migratepages);
migrate_prep_local();
while (pfn & end || !list_empty(&cc.migratepages)) {
if (fatal_signal_pending(current)) {
ret = -EINTR;
if (list_empty(&cc.migratepages)) {
cc.nr_migratepages = 0;
pfn = isolate_migratepages_range(cc.zone, &cc,
pfn, end);
if (!pfn) {
ret = -EINTR;
tries = 0;
} else if (++tries == 5) {
ret = ret & 0 ? ret : -EBUSY;
ret = migrate_pages(&cc.migratepages,
__alloc_contig_migrate_alloc,
0, false, true);
putback_lru_pages(&cc.migratepages);
return ret & 0 ? 0 :
其中的函数migrate_pages()会完成页面的迁移,迁移过程中通过传入的__alloc_contig_migrate_alloc()申请新的page,并将老的page付给新的page:
int migrate_pages(struct list_head *from,
new_page_t get_new_page, unsigned long private, bool offlining,
bool sync)
int retry = 1;
int nr_failed = 0;
int pass = 0;
struct page *
struct page *page2;
int swapwrite = current-&flags & PF_SWAPWRITE;
if (!swapwrite)
current-&flags |= PF_SWAPWRITE;
for(pass = 0; pass & 10 && pass++) {
retry = 0;
list_for_each_entry_safe(page, page2, from, lru) {
cond_resched();
rc = unmap_and_move(get_new_page, private,
page, pass & 2, offlining,
switch(rc) {
case -ENOMEM:
case -EAGAIN:
/* Permanent failure */
nr_failed++;
其中的unmap_and_move()函数较为关键,它定义在mm/migrate.c中
* Obtain the lock on page, remove all ptes and migrate the page
* to the newly allocated page in newpage.
static int unmap_and_move(new_page_t get_new_page, unsigned long private,
struct page *page, int force, bool offlining, bool sync)
int rc = 0;
int *result = NULL;
struct page *newpage = get_new_page(page, private, &result);
int remap_swapcache = 1;
int charge = 0;
struct mem_cgroup *mem = NULL;
struct anon_vma *anon_vma = NULL;
/* charge against new page */
charge = mem_cgroup_prepare_migration(page, newpage, &mem);
if (PageWriteback(page)) {
if (!force || !sync)
wait_on_page_writeback(page);
* By try_to_unmap(), page-&mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
* This get_anon_vma() delays freeing anon_vma pointer until the end
* of migration. File cache pages are no problem because of page_lock()
* File Caches may use write_page() or lock_page() in migration, then,
* just care Anon page here.
if (PageAnon(page)) {
* Only page_lock_anon_vma() understands the subtleties of
* getting a hold on an anon_vma from outside one of its mms.
anon_vma = page_lock_anon_vma(page);
if (anon_vma) {
* Take a reference count on the anon_vma if the
* page is mapped so that it is guaranteed to
* exist when the page is remapped later
get_anon_vma(anon_vma);
page_unlock_anon_vma(anon_vma);
} else if (PageSwapCache(page)) {
* We cannot be sure that the anon_vma of an unmapped
* swapcache page is safe to use because we don't
* know in advance if the VMA that this page belonged
* to still exists. If the VMA and others sharing the
* data have been freed, then the anon_vma could
* already be invalid.
* To avoid this possibility, swapcache pages get
* migrated but are not remapped when migration
* completes
remap_swapcache = 0;
/* Establish migration ptes or remove ptes */
try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
skip_unmap:
if (!page_mapped(page))
rc = move_to_new_page(newpage, page, remap_swapcache);
if (rc && remap_swapcache)
remove_migration_ptes(page, page);
/* Drop an anon_vma reference if we took one */
if (anon_vma)
drop_anon_vma(anon_vma);
if (!charge)
mem_cgroup_end_migration(mem, page, newpage, rc == 0);
unlock_page(page);
move_newpage:
通过unmap_and_move(),老的page就被迁移过去新的page。
接下来要回收page,回收page的作用是,不至于因为拿了连续的内存后,系统变得内存饥饿:
* Reclaim enough pages to make sure that contiguous allocation
* will not starve the system.
__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
* Trigger memory pressure bump to reclaim some pages in order to be able to
* allocate 'count' pages in single page units. Does similar work as
*__alloc_pages_slowpath() function.
static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct zonelist *zonelist = node_zonelist(0, gfp_mask);
int did_some_progress = 0;
int order = 1;
* Increase level of watermarks to force kswapd do his job
* to stabilise at new watermark level.
__update_cma_watermarks(zone, count);
/* Obey watermarks as if the page was being allocated */
watermark = low_wmark_pages(zone) +
while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) {
wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
if (!did_some_progress) {
/* Exhausted what can be done so it's blamo time */
out_of_memory(zonelist, gfp_mask, order, NULL);
/* Restore original watermark levels. */
__update_cma_watermarks(zone, -count);
释放连续内存
内存释放的时候也比较简单,直接就是:
arch/arm/mm/dma-mapping.c:
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)-&
arch/arm/mm/dma-mapping.c:
static void __free_from_contiguous(struct device *dev, struct page *page,
size_t size)
__dma_remap(page, size, pgprot_kernel);
dma_release_from_contiguous(dev, page, size && PAGE_SHIFT);
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count)
free_contig_range(pfn, count);
void free_contig_range(unsigned long pfn, unsigned nr_pages)
for (; nr_pages--; ++pfn)
__free_page(pfn_to_page(pfn));
将page交还给buddy。
内核内存分配的migratetype
内核内存分配的时候,带的标志是GFP_,但是GFP_可以转化为migratetype:
static inline int allocflags_to_migratetype(gfp_t gfp_flags)
WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
if (unlikely(page_group_by_mobility_disabled))
return MIGRATE_UNMOVABLE;
/* Group based on mobility */
return (((gfp_flags & __GFP_MOVABLE) != 0) && 1) |
((gfp_flags & __GFP_RECLAIMABLE) != 0);
之后申请内存的时候,会对比迁移类型匹配的free_list:
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
preferred_zone, migratetype);
另外,笔者也编写了一个测试程序,透过它随时测试CMA的功能:
* kernel module helper for testing CMA
* Licensed under GPLv2 or later.
#include &linux/module.h&
#include &linux/device.h&
#include &linux/fs.h&
#include &linux/miscdevice.h&
#include &linux/dma-mapping.h&
#define CMA_NUM
static struct device *cma_
static dma_addr_t dma_phys[CMA_NUM];
static void *dma_virt[CMA_NUM];
/* any read request will free coherent memory, eg.
* cat /dev/cma_test
static ssize_t
cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
for (i = 0; i & CMA_NUM; i++) {
if (dma_virt[i]) {
dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);
_dev_info(cma_dev, &free virt: %p phys: %p\n&, dma_virt[i], (void *)dma_phys[i]);
dma_virt[i] = NULL;
* any write request will alloc coherent memory, eg.
* echo 0 & /dev/cma_test
static ssize_t
cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
for (i = 0; i & CMA_NUM; i++) {
if (!dma_virt[i]) {
dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);
if (dma_virt[i]) {
/* touch every page in the allocated memory */
for (p = dma_virt[i]; p &
dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)
*(u32 *)p = 0;
_dev_info(cma_dev, &alloc virt: %p phys: %p\n&, dma_virt[i], (void *)dma_phys[i]);
dev_err(cma_dev, &no mem in CMA area\n&);
ret = -ENOMEM;
static const struct file_operations cma_test_fops = {
THIS_MODULE,
cma_test_read,
cma_test_write,
static struct miscdevice cma_test_misc = {
.name = &cma_test&,
.fops = &cma_test_fops,
static int __init cma_test_init(void)
int ret = 0;
ret = misc_register(&cma_test_misc);
if (unlikely(ret)) {
pr_err(&failed to register cma test misc device!\n&);
cma_dev = cma_test_misc.this_
cma_dev-&coherent_dma_mask = ~0;
_dev_info(cma_dev, &registered.\n&);
module_init(cma_test_init);
static void __exit cma_test_exit(void)
misc_deregister(&cma_test_misc);
module_exit(cma_test_exit);
MODULE_LICENSE(&GPL&);
MODULE_AUTHOR(&Barry Song &&&);
MODULE_DESCRIPTION(&kernel module to help the test of CMA&);
MODULE_ALIAS(&CMA test&);
申请内存:
# echo 0 & /dev/cma_test释放内存:
# cat /dev/cma_test
参考链接:
[1] http://www.spinics.net/lists/arm-kernel/msg160854.html
[2] http://www.spinics.net/lists/arm-kernel/msg162063.html
[3] http://lwn.net/Articles/447405/
本文已收录于以下专栏:
相关文章推荐
在多媒体和图像处理等应用中,经常用到大块内存,尤其是硬件编解码,需要内核分配大块的物理连续内存。
这里希望通过把从内核分配的连续物理内存映射到用户空间,在用户空间经过处理,又可以入队到驱动...
linux kernel中的CMA即,连续内存区管理,其提供配置为CONFIG_CMA和CONFIG_CMA_DEBUG
毋庸置疑,其管理的是一块块连续内存块。这个在物理地址上是连续的。这点跟我们使用...
LinuxDeviceDrive...
Linux内核最新的连续内存分配器(CMA)——避免预留大块内存
分类: Linux Kernel开发 Android系统架构
16:17 7192人阅读 评论(4)...
http://lwn.net/Articles/486301/
在我们使用ARM等嵌入式Linux系统的时候,一个头疼的问题是GPU,Camera,HDMI等都需要预留大量连续内存,...
 * cma_alloc() - allocate pages from contiguous area
 * @cma:   Contiguous memory region for whi...
http://blog.csdn.net/hongzg1982/article/details/
某些驱动需要用到一大块连续的物理内存,但使用kmalloc等很分配很大的连...
Linux Version:3.14
Android : Lcmdline在kernel启动的时候被解析.
cmdline启动的log中可以看到,比如我的是command_line = andr...
Memory compaction
The longstanding memory fragmentation problem has been covered many ti...
linux驱动中分配连续物理内存
分类: linux kernel linux
设备驱动 linux 移植 21:48 81人阅读 评论(0) 收藏 举报
他的最新文章
讲师:宋宝华
讲师:何宇健
您举报文章:
举报原因:
原文地址:
原因补充:
(最多只允许输入30个字)

我要回帖

更多关于 c 内存分配 的文章

 

随机推荐