vmalloc()函数的工作方式类似于kmalloc(),只不过前者分配的内存虚拟地址是连续的,而物理地址则无需连续。这也是用户空间分配函数的工作方式:由malloc()返回的页在进程的虚拟地址空间内是连续的,但是,这并不保证它们在物理RAM中也连续。kmalloc()函数确保页在物理地址上是连续的(虚拟地址自然也是连续的)。vmalloc()函数只确保页在虚拟地址空间内是连续的。它通过分配非连续的物理内存块,再“修正”页表,把内存映射到逻辑地址空间的连续区域中,就能做到这一点。
一般只有硬件设备需要得到物理地址连续的内存。在很多体系结构上,硬件设备存在于内存管理单元以外,它根本不理解什么是虚拟地址。因此,硬件设备用到的任何内存区都必须是物理上连续的块,而不仅仅是虚拟地址连续的块。而仅供软件使用的内存块(例如与进程相关的缓冲区)就可以使用只有虚拟地址连续的内存块。对内核而言,所有内存看起来都是逻辑上连续的。
尽管紧急在某些情况下才需要物理上连续的内存块,但是,很多内核代码都用kmalloc()来获得内存,而不是vmalloc()。因为vmalloc()函数为了把物理上不连续的页转换为虚拟地址空间上连续的页,必须专门建立页表项。通过vmalloc()获得的页必须一个一个地进行映射,这就导致比直接内存映射大得多的TLB抖动。因此,vmalloc()只在不得已的时候才用,一般是为了获得大块内存时,如,当模块被动态的插入到内核中,就把模块装载到由vmalloc()分配的内存上。
- 在<Vmalloc.c(mm)>
- /**
- * vmalloc - allocate virtually contiguous memory
- * @size: allocation size
- * Allocate enough pages to cover @size from the page level
- * allocator and map them into contiguous kernel virtual space.
- *
- * For tight control over page level allocator and protection flags
- * use __vmalloc() instead.
- */
- void *vmalloc(unsigned long size)
- {
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
- }
- void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
- {
- return __vmalloc_node(size, gfp_mask, prot, -1);
- }
- /**
- * __vmalloc_node - allocate virtually contiguous memory
- * @size: allocation size
- * @gfp_mask: flags for the page level allocator
- * @prot: protection mask for the allocated pages
- * @node: node to use for allocation or -1
- *
- * Allocate enough pages to cover @size from the page level
- * allocator with @gfp_mask flags. Map them into contiguous
- * kernel virtual space, using a pagetable protection of @prot.
- */
- static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
- int node)
- {
- struct vm_struct *area;
- size = PAGE_ALIGN(size);
- if (!size || (size >> PAGE_SHIFT) > num_physpages)
- return NULL;
- area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
- if (!area)
- return NULL;
- return __vmalloc_area_node(area, gfp_mask, prot, node);
- }
- struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
- unsigned long start, unsigned long end)
- {
- return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL);
- }
- static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
- unsigned long start, unsigned long end,
- int node, gfp_t gfp_mask)
- {
- struct vm_struct **p, *tmp, *area;
- unsigned long align = 1;
- unsigned long addr;
- BUG_ON(in_interrupt());
- if (flags & VM_IOREMAP) {
- int bit = fls(size);
- if (bit > IOREMAP_MAX_ORDER)
- bit = IOREMAP_MAX_ORDER;
- else if (bit < PAGE_SHIFT)
- bit = PAGE_SHIFT;
- align = 1ul << bit;
- }
- addr = ALIGN(start, align);
- size = PAGE_ALIGN(size);
- if (unlikely(!size))
- return NULL;
- area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node);
- if (unlikely(!area))
- return NULL;
- /*
- * We always allocate a guard page.
- */
- size += PAGE_SIZE;
- write_lock(&vmlist_lock);
- for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
- if ((unsigned long)tmp->addr < addr) {
- if((unsigned long)tmp->addr + tmp->size >= addr)
- addr = ALIGN(tmp->size +
- (unsigned long)tmp->addr, align);
- continue;
- }
- if ((size + addr) < addr)
- goto out;
- if (size + addr <= (unsigned long)tmp->addr)
- goto found;
- addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
- if (addr > end - size)
- goto out;
- }
- found:
- area->next = *p;
- *p = area;
- area->flags = flags;
- area->addr = (void *)addr;
- area->size = size;
- area->pages = NULL;
- area->nr_pages = 0;
- area->phys_addr = 0;
- write_unlock(&vmlist_lock);
- return area;
- out:
- write_unlock(&vmlist_lock);
- kfree(area);
- if (printk_ratelimit())
- printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size./n");
- return NULL;
- }
函数可能睡眠,因此不能在中断上下文进行调用,也不能在其他不允许阻塞情况下调用。
要释放通过vmalloc()函数所获得的内存,使用下面的函数:
- /**
- * vfree - release memory allocated by vmalloc()
- * @addr: memory base address
- *
- * Free the virtually contiguous memory area starting at @addr, as
- * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
- * NULL, no operation is performed.
- *
- * Must not be called in interrupt context.
- */
- void vfree(void *addr)
- {
- BUG_ON(in_interrupt());
- __vunmap(addr, 1);
- }
- void __vunmap(void *addr, int deallocate_pages)
- {
- struct vm_struct *area;
- if (!addr)
- return;
- if ((PAGE_SIZE-1) & (unsigned long)addr) {
- printk(KERN_ERR "Trying to vfree() bad address (%p)/n", addr);
- WARN_ON(1);
- return;
- }
- area = remove_vm_area(addr);
- if (unlikely(!area)) {
- printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)/n",
- addr);
- WARN_ON(1);
- return;
- }
- debug_check_no_locks_freed(addr, area->size);
- if (deallocate_pages) {
- int i;
- for (i = 0; i < area->nr_pages; i++) {
- BUG_ON(!area->pages[i]);
- __free_page(area->pages[i]);
- }
- if (area->flags & VM_VPAGES)
- vfree(area->pages);
- else
- kfree(area->pages);
- }
- kfree(area);
- return;
- }
- /**
- * remove_vm_area - find and remove a contingous kernel virtual area
- * @addr: base address
- *
- * Search for the kernel VM area starting at @addr, and remove it.
- * This function returns the found VM area, but using it is NOT safe
- * on SMP machines, except for its size or flags.
- */
- struct vm_struct *remove_vm_area(void *addr)
- {
- struct vm_struct *v;
- write_lock(&vmlist_lock);
- v = __remove_vm_area(addr);
- write_unlock(&vmlist_lock);
- return v;
- }
- /* Caller must hold vmlist_lock */
- static struct vm_struct *__remove_vm_area(void *addr)
- {
- struct vm_struct **p, *tmp;
- for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
- if (tmp->addr == addr)
- goto found;
- }
- return NULL;
- found:
- unmap_vm_area(tmp);
- *p = tmp->next;
- /*
- * Remove the guard page.
- */
- tmp->size -= PAGE_SIZE;
- return tmp;
- }
这个函数也可以睡眠,因此不能在中断上下文中调用。