vm_dep.h revision b52a336e0ccd9ef29cd11bb3c7e530788483ee04
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * CDDL HEADER START
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * The contents of this file are subject to the terms of the
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Common Development and Distribution License (the "License").
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * You may not use this file except in compliance with the License.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * See the License for the specific language governing permissions
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * and limitations under the License.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * When distributing Covered Code, include this CDDL HEADER in each
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * If applicable, add the following below this CDDL HEADER, with the
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * fields enclosed by brackets "[]" replaced with your own identifying
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * information: Portions Copyright [yyyy] [name of copyright owner]
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * CDDL HEADER END
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Copyright (c) 2010, Intel Corporation.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * All rights reserved.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * UNIX machine dependent virtual memory support.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern "C" {
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * WARNING: vm_dep.h is included by files in common.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Do not use this function for obtaining clock tick. This
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * is called by callers who do not need to have a guarenteed
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * correct tick value. The proper routine to use is tsc_read().
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc plcnt_inc_dec(pp, mtype, szc, 1l << PAGE_BSZS_SHIFT(szc), flags)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc plcnt_inc_dec(pp, mtype, szc, -1l << PAGE_BSZS_SHIFT(szc), flags)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * macro to update page list max counts. no-op on x86.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * macro to modify the page list max counts when memory is added to
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * the page lists during startup (add_physmem) or during a DR operation
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * when memory is added (kphysm_add_memory_dynamic) or deleted
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * (kphysm_del_cleanup).
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, cnt)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern int mnode_pgcnt(int);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern int mnode_range_cnt(int);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * candidate counters in vm_pagelist.c are indexed by color and range
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * This was really badly defined, it implicitly uses mnode_maxmrange[]
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * which is a static in vm_pagelist.c
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern int mtype_2_mrange(int);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Per page size free lists. Allocated dynamically.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * dimensions [mtype][mmu_page_sizes][colors]
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * mtype specifies a physical memory range with a unique mnode.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * For now there is only a single size cache list. Allocated dynamically.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * dimensions [mtype][colors]
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * mtype specifies a physical memory range with a unique mnode.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * There are mutexes for both the page freelist
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * and the page cachelist. We want enough locks to make contention
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * reasonable, but not too many -- otherwise page_freelist_lock() gets
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * so expensive that it becomes the bottleneck!
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc/* mem node iterator is not used on x86 */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * interleaved_mnodes mode is never set on x86, therefore,
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * simply return the limits of the given mnode, which then
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * determines the length of hpm_counters array for the mnode.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * macro to call page_ctrs_adjust() when memory is added
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * during a DR operation.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn; \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * This macro calculates the next sequential pfn with the specified
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * color using color equivalency mask
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it) \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc } else { \
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang/* get the color equivalency mask for the next szc */
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang ((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang/* get the color of the next szc */
a399b7655a1d835aa8606c2b29e4e777baac8635zf ((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
a399b7655a1d835aa8606c2b29e4e777baac8635zf/* Find the bin for the given page if it was of size szc */
a399b7655a1d835aa8606c2b29e4e777baac8635zf#define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL))
a399b7655a1d835aa8606c2b29e4e777baac8635zf#define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1))
a399b7655a1d835aa8606c2b29e4e777baac8635zf * this structure is used for walking free page lists
a399b7655a1d835aa8606c2b29e4e777baac8635zf * controls when to split large pages into smaller pages,
a399b7655a1d835aa8606c2b29e4e777baac8635zf * and when to coalesce smaller pages into larger pages
a399b7655a1d835aa8606c2b29e4e777baac8635zftypedef struct page_list_walker {
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc uint_t plw_bin_split_prev; /* last bin we tried to split */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc uint_t plw_ceq_dif; /* number of different color groups */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc /* to check */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcvoid page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxcextern int mtype_init(vnode_t *, caddr_t, uint_t *, size_t);
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list,
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * and page_get_contig_pages)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * MTYPE_START sets the initial mtype. -1 if the mtype range specified does
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * not contain mnode.
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang * MTYPE_NEXT sets the next mtype. -1 if there are no more valid
e2cf88ac9d753a00c17aa235f6afdc76574fe3a6Quaker Fang * mtype in the range.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc } else { \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc/* Return the leader for this mapping size */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc/* Return the root page for this page based on p_szc */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * The counter base must be per page_counter element to prevent
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * races when re-indexing, and the base page size element should
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * be aligned on a boundary of the given region size.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * We also round up the number of pages spanned by the counters
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * for a given region to PC_BASE_ALIGN in certain situations to simplify
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * the coding for some non-performance critical routines.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PC_BASE_ALIGN ((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1))
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * cpu/mmu-dependent vm variables
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * page sizes that legacy applications can see via getpagesizes(3c).
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Used to prevent legacy applications from inadvertantly using the
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * 'new' large pagesizes (1g and above).
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc/* For x86, userszc is the same as the kernel's szc */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * for hw_page_map_t, sized to hold the ratio of large page to base
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * pagesize (1024 max)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxctypedef short hpmctr_t;
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * get the setsize of the current cpu - assume homogenous for x86
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc (l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * for the number of base pages in this pagesize
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Internal PG_ flags.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PGI_RELOCONLY 0x010000 /* opposite of PG_NORELOC */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PGI_PGCPHIPRI 0x040000 /* page_get_contig_page pri alloc */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PGI_PGCPSZC0 0x080000 /* relocate base pagesize page */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * PGI range flags - should not overlap PGI flags
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PGI_MT_RANGE16M 0x2000000 /* mtype range to 16m */
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc#define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G)
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Maximum and default values for user heap, stack, private and shared
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * anonymous memory, and user text and initialized data.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Used by map_pgsz*() routines.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * Sanity control. Don't use large pages regardless of user
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * settings if there's less than priv or shm_lpg_min_physmem memory installed.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * The units for this variable are 8K pages.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * hash as and addr to get a bin.
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * cpu private vm data - accessed thru CPU->cpu_vm_data
0ba2cbe97e0678a691742f98d2532caed0a2c4aaxc * vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
void *vc_kmptr;
#define VM_CPU_DATA_PADSIZE \
extern char vm_cpu_data0[];
#ifdef VM_STATS
struct vmm_vmstats_str {
pfn_t);
extern void page_freelist_coalesce_all(int);
#ifdef __cplusplus