vm_dep.h revision f045d8d6fec1759551cc2bce1d26628931f14fce
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
*/
/*
* UNIX machine dependent virtual memory support.
*/
#ifndef _VM_DEP_H
#define _VM_DEP_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* WARNING: vm_dep.h is included by files in common.
*/
/*
* Do not use this function for obtaining clock tick. This
* is called by callers who do not need to have a guarenteed
* correct tick value. The proper routine to use is tsc_read().
*/
extern int kernel_page_update_flags_x86(uint_t *);
extern void plcnt_inc_dec(page_t *, int, int, long, int);
/*
* macro to update page list max counts. no-op on x86.
*/
#define PLCNT_XFER_NORELOC(pp)
/*
* macro to modify the page list max counts when memory is added to
* the page lists during startup (add_physmem) or during a DR operation
* when memory is added (kphysm_add_memory_dynamic) or deleted
* (kphysm_del_cleanup).
*/
extern int memrange_num(pfn_t);
extern int pfn_2_mtype(pfn_t);
extern int mtype_func(int, int, uint_t);
extern void mtype_modify_max(pfn_t, long);
extern int mnode_pgcnt(int);
extern int mnode_range_cnt(int);
/*
* candidate counters in vm_pagelist.c are indexed by color and range
*/
#define MAX_MNODE_MRANGES NUM_MEM_RANGES
/*
* combined memory ranges from mnode and memranges[] to manage single
*/
typedef struct {
int mnr_mnode;
int mnr_memrange; /* index into memranges[] */
int mnr_next; /* next lower PA mnoderange */
int mnr_exists;
/* maintain page list stats */
#ifdef DEBUG
int mnr_mts_colors;
} *mnr_mts;
#endif
} mnoderange_t;
#define MEMRANGEHI(mtype) \
/*
* This was really badly defined, it implicitly uses mnode_maxmrange[]
* which is a static in vm_pagelist.c
*/
extern int mtype_2_mrange(int);
/*
* this structure is used for walking free page lists, it
* controls when to split large pages into smaller pages,
* and when to coalesce smaller pages into larger pages
*/
typedef struct page_list_walker {
/* to check */
/*
* Page freelists are organized as two freelist types user and kernel, with
* their own policy and allocation routines. The definitions related to the
* freelist type structure are grouped below.
*
* The page size free lists themselves are allocated dynamically with
* dimensions [mtype][mmu_page_sizes][colors]
*
* mtype specifies a physical memory range with a unique mnode.
*/
#define MAX_PFLT_POLICIES 3
#define MAX_PFLT_TYPE 2
/*
* The kernel only needs a small number of page colors, far fewer than user
* programs.
*/
#define KFLT_PAGE_COLORS 16
typedef struct page_freelist_type page_freelist_type_t;
extern page_freelist_type_t flt_user;
extern page_freelist_type_t flt_kern;
extern page_freelist_type_t *ufltp;
extern page_freelist_type_t *kfltp;
uint_t, int *);
uint_t);
void kflt_expand(void);
struct page_freelist_type {
int pflt_type; /* type is user or kernel */
int pflt_num_policies; /* the number of policy routines */
/*
* the policy routines are called by the allocator routine
* to implement the actual allocation policies.
*/
};
((is_kflt) ? \
{ \
} else { \
} \
}
#else /* __amd64 && ! __xpv */
#endif /* __amd64 && ! __xpv */
#define PAGE_GET_FREELISTS_POLICY(fp, i) \
(fp->pflt_policy[i])
/*
* For now there is only a single size cache list. Allocated dynamically.
* dimensions [mtype][colors]
*
* mtype specifies a physical memory range with a unique mnode.
*/
extern page_t ***page_cachelists;
/*
* There are mutexes for the user page freelist, the kernel page freelist
* and the page cachelist. We want enough locks to make contention
* reasonable, but not too many -- otherwise page_freelist_lock() gets
* so expensive that it becomes the bottleneck!
*/
#define NPC_MUTEX 16
/*
* The kflt_disable variable is used to determine whether the kernel freelist
* is supported on this platform.
*/
extern int kflt_disable;
/* flag used by the kflt_export function when calling page_promote */
#define PC_KFLT_EXPORT 0x4
extern void kflt_evict_wakeup();
extern void kflt_freemem_add(pgcnt_t);
extern void kflt_freemem_sub(pgcnt_t);
/* mem node iterator is not used on x86 */
#define MEM_NODE_ITERATOR_DECL(it)
/*
* interleaved_mnodes mode is never set on x86, therefore,
* simply return the limits of the given mnode, which then
* determines the length of hpm_counters array for the mnode.
*/
{ \
}
#define PAGE_CTRS_WRITE_LOCK(mnode) \
{ \
}
#define PAGE_CTRS_WRITE_UNLOCK(mnode) \
{ \
}
/*
* macro to call page_ctrs_adjust() when memory is added
* during a DR operation.
*/
int _mn; \
break; \
} \
}
(((pfn) & page_colors_mask) >> \
#define PNUM_SHIFT(szc) \
#define PAGE_GET_SHIFT(szc) \
#define PAGE_GET_PAGECOLORS(szc) \
/*
* This macro calculates the next sequential pfn with the specified
* color using color equivalency mask
*/
{ \
} else { \
} \
}
/* get the color equivalency mask for the next szc */
/* get the color of the next szc */
/* Find the bin for the given page if it was of size szc */
/*
* macros to loop through the mtype range (page_get_mnode_{free,cache,any}list,
* and page_get_contig_pages)
*
* MTYPE_START sets the initial mtype. -1 if the mtype range specified does
* not contain mnode.
*
* MTYPE_NEXT sets the next mtype. -1 if there are no more valid
* mtype in the range.
*/
if (flags & PGI_MT_RANGE) { \
} else { \
mtype = -1; \
} \
}
((is_kflt) ? \
((flags & PG_FREE_LIST) ? \
#ifdef DEBUG
#else
#endif
#define FULL_REGION_CNT(rg_szc) \
/* Return the leader for this mapping size */
/* Return the root page for this page based on p_szc */
/*
* The counter base must be per page_counter element to prevent
* races when re-indexing, and the base page size element should
* be aligned on a boundary of the given region size.
*
* We also round up the number of pages spanned by the counters
* for a given region to PC_BASE_ALIGN in certain situations to simplify
* the coding for some non-performance critical routines.
*/
/*
* cpu/mmu-dependent vm variables
*/
extern uint_t mmu_page_sizes;
extern uint_t mmu_exported_page_sizes;
/*
* page sizes that legacy applications can see via getpagesizes(3c).
* Used to prevent legacy applications from inadvertantly using the
* 'new' large pagesizes (1g and above).
*/
extern uint_t mmu_legacy_page_sizes;
/* For x86, userszc is the same as the kernel's szc */
/*
* for hw_page_map_t, sized to hold the ratio of large page to base
* pagesize (1024 max)
*/
typedef short hpmctr_t;
/*
* get the setsize of the current cpu - assume homogenous for x86
*/
#define L2CACHE_ALIGN l2cache_linesz
#define L2CACHE_ALIGN_MAX 64
#define CPUSETSIZE() \
/*
* Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count
* for the number of base pages in this pagesize
*/
/*
* Internal PG_ flags.
*/
/*
* PGI range flags - should not overlap PGI flags
*/
/* Flag to avoid allocating a page in page_import_kflt() */
#define PGI_NOPGALLOC 0x10000000
/*
* Maximum and default values for user heap, stack, private and shared
* anonymous memory, and user text and initialized data.
* Used by map_pgsz*() routines.
*/
extern size_t max_uheap_lpsize;
extern size_t default_uheap_lpsize;
extern size_t max_ustack_lpsize;
extern size_t default_ustack_lpsize;
extern size_t max_privmap_lpsize;
extern size_t max_uidata_lpsize;
extern size_t max_utext_lpsize;
extern size_t max_shm_lpsize;
extern size_t mcntl0_lpsize;
/*
* Sanity control. Don't use large pages regardless of user
* settings if there's less than priv or shm_lpg_min_physmem memory installed.
* The units for this variable are 8K pages.
*/
extern pgcnt_t privm_lpg_min_physmem;
extern pgcnt_t shm_lpg_min_physmem;
/*
* hash as and addr to get a bin.
*/
& page_colors_mask) >> \
if (is_kflt) { \
} \
}
/*
* cpu private vm data - accessed thru CPU->cpu_vm_data
* vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
* vc_pnext_memseg: tracks last memseg visited in page_nextn()
* vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t
* vc_kmsize: orignal kmem size for this vm_cpu_data_t
*/
typedef struct {
struct memseg *vc_pnum_memseg;
struct memseg *vc_pnext_memseg;
void *vc_kmptr;
/* allocation size to ensure vm_cpu_data_t resides in its own cache line */
#define VM_CPU_DATA_PADSIZE \
/* for boot cpu before kmem is initialized */
extern char vm_cpu_data0[];
/*
* When a bin is empty, and we can't satisfy a color request correctly,
* we scan. If we assume that the programs have reasonable spatial
* behavior, then it will not be a good idea to use the adjacent color.
* Using the adjacent color would result in virtually adjacent addresses
* mapping into the same spot in the cache. So, if we stumble across
* an empty bin, skip a bunch before looking. After the first skip,
* then just look one bin at a time so we don't miss our cache on
* every look. Be sure to check every bin. Page_create() will panic
* if we miss a page.
*
* This also explains the `<=' in the for loops in both page_get_freelist()
* and page_get_cachelist(). Since we checked the target bin, skipped
* a bunch, then continued one a time, we wind up checking the target bin
* twice to make sure we get all of them bins.
*/
#define BIN_STEP 19
#ifdef VM_STATS
struct vmm_vmstats_str {
/* page_get_uflt and page_get_kflt */
/* page coalesce counter */
/* candidates useful */
/* ctrs changed after locking */
/* page_freelist_coalesce failed */
};
extern struct vmm_vmstats_str vmm_vmstats;
#endif /* VM_STATS */
extern size_t page_ctrs_sz(void);
extern void page_ctr_sub(int, int, page_t *, int);
pfn_t);
extern void page_freelist_coalesce_all(int);
#ifdef __cplusplus
}
#endif
#endif /* _VM_DEP_H */