vm_dep.h revision 06fb6a368cb1af862cff62b9a1fd89171e9ac63a
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* UNIX machine dependent virtual memory support.
*/
#ifndef _VM_DEP_H
#define _VM_DEP_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <vm/hat_sfmmu.h>
#include <sys/archsystm.h>
/*
* Do not use this function for obtaining clock tick. This
* is called by callers who do not need to have a guarenteed
* correct tick value. The proper routine to use is tsc_read().
*/
/*
* Per page size free lists. Allocated dynamically.
*/
#define MTYPE_RELOC 0
#define MTYPE_NORELOC 1
/* mtype init for page_get_replacement_page */
/*
* candidate counters in vm_pagelist.c are indexed by color and range
*/
#define MAX_MNODE_MRANGES MAX_MEM_TYPES
/*
* Internal PG_ flags.
*/
/*
* PGI mtype flags - should not overlap PGI flags
*/
/*
* couple of locks. There are mutexes for both the page freelist
* and the page cachelist. We want enough locks to make contention
* reasonable, but not too many -- otherwise page_freelist_lock() gets
* so expensive that it becomes the bottleneck!
*/
#define NPC_MUTEX 16
/*
* Iterator provides the info needed to convert RA to PA.
* MEM_NODE_ITERATOR_INIT() should be called before
* PAGE_NEXT_PFN_FOR_COLOR() if pfn was not obtained via a previous
* PAGE_NEXT_PFN_FOR_COLOR() call. Iterator caches color 2 hash
* translations requiring initializer call if color or ceq_mask changes,
* even if pfn doesn't. MEM_NODE_ITERATOR_INIT() must also be called before
* PFN_2_COLOR() that uses a valid iterator argument.
*/
#ifdef sun4v
typedef struct mem_node_iterator {
int mi_init; /* set to 1 when first init */
int mi_last_mblock; /* last mblock visited */
#define MEM_NODE_ITERATOR_DECL(it) \
mem_node_iterator_t *, int);
extern int interleaved_mnodes;
#else /* sun4v */
#define MEM_NODE_ITERATOR_DECL(it) \
#endif /* sun4v */
/*
* Return the mnode limits so that hpc_counters length and base
* index can be determined. When interleaved_mnodes is set, we
* create an array only for the first mnode that exists. All other
* mnodes will share the array in this case.
* If interleaved_mnodes is not set, simply return the limits for
* the given mnode.
*/
if (!interleaved_mnodes) { \
} else if ((first) < 0) { \
}
#define PAGE_CTRS_WRITE_LOCK(mnode) \
if (!interleaved_mnodes) { \
} else { \
/* changing shared hpm_counters */ \
int _i; \
page_freelist_lock(_i); \
} \
}
#define PAGE_CTRS_WRITE_UNLOCK(mnode) \
if (!interleaved_mnodes) { \
} else { \
int _i; \
} \
}
/*
* cpu specific color conversion functions
*/
#pragma weak page_get_nsz_color_mask_cpu
#pragma weak page_get_nsz_color_cpu
#pragma weak page_get_color_shift_cpu
#pragma weak page_convert_color_cpu
#pragma weak page_next_pfn_for_color_cpu
#pragma weak page_pfn_2_color_cpu
((&page_get_color_shift_cpu != NULL) ? \
((&page_convert_color_cpu != NULL) ? \
((&page_pfn_2_color_cpu != NULL) ? \
hw_page_array[0].hp_shift)))
#define PNUM_SHIFT(szc) \
#define PAGE_GET_SHIFT(szc) \
#define PAGE_GET_PAGECOLORS(szc) \
/*
* This macro calculates the next sequential pfn with the specified
* color using color equivalency mask
*/
{ \
if (&page_next_pfn_for_color_cpu == NULL) { \
} else { \
pfn_shift; \
} \
} else { \
} \
}
/* get the color equivalency mask for the next szc */
((&page_get_nsz_color_mask_cpu == NULL) ? \
/* get the color of the next szc */
((&page_get_nsz_color_cpu == NULL) ? \
/* Find the bin for the given page if it was of size szc */
/*
* this structure is used for walking free page lists
* controls when to split large pages into smaller pages,
* and when to coalesce smaller pages into larger pages
*/
typedef struct page_list_walker {
/* to check */
typedef char hpmctr_t;
#ifdef DEBUG
#else
#endif
/*
* page list count per mnode and type.
*/
typedef struct {
#ifdef DEBUG
struct {
int plc_mts_colors;
#endif
#ifdef DEBUG
int szc; \
} \
}
#define PLCNT_INIT(base) { \
plc_mts_colors = colors; \
} \
} \
} \
}
if (flags & PG_CACHE_LIST) \
else if (szc) \
else \
cnt); \
}
#else
#define PLCNT_INIT(base)
/* PG_FREE_LIST may not be explicitly set in flags for large pages */
if (flags & PG_CACHE_LIST) \
else if (szc) \
else \
}
#endif
}
}
/*
* macros to update page list max counts - done when pages transferred
* from RELOC to NORELOC mtype (kcage_init or kcage_assimilate_page).
*/
#define PLCNT_XFER_NORELOC(pp) { \
}
/*
* macro to modify the page list max counts when memory is added to
* the page lists during startup (add_physmem) or during a DR operation
* when memory is added (kphysm_add_memory_dynamic) or deleted
* (kphysm_del_cleanup).
*/
int _mn; \
if (&plat_mem_node_intersect_range != NULL) { \
if (_np == 0) \
continue; \
} \
} else { \
_pfn; \
} \
} \
}
#define MNODE_PGCNT(mn) \
/*
* macros to loop through the mtype range - MTYPE_START returns -1 in
*/
} \
}
/*
* if allocation from the RELOC pool failed and there is sufficient cage
* memory, attempt to allocate from the NORELOC pool.
*/
(kcage_freemem >= kcage_lotsfree)) { \
mtype = -1; \
} else { \
mtype = MTYPE_NORELOC; \
flags |= PG_NORELOC; \
} \
} else { \
mtype = -1; \
} \
}
/*
* get the ecache setsize for the current cpu.
*/
/*
* For sfmmu each larger page is 8 times the size of the previous
* size page.
*/
/*
* The counter base must be per page_counter element to prevent
* races when re-indexing, and the base page size element should
* be aligned on a boundary of the given region size.
*
* We also round up the number of pages spanned by the counters
* for a given region to PC_BASE_ALIGN in certain situations to simplify
* the coding for some non-performance critical routines.
*/
extern int ecache_alignsize;
#define L2CACHE_ALIGN ecache_alignsize
#define L2CACHE_ALIGN_MAX 512
extern int update_proc_pgcolorbase_after_fork;
extern int consistent_coloring;
extern uint_t vac_colors_mask;
extern int vac_size;
extern int vac_shift;
/*
* Kernel mem segment in 64-bit space
*/
extern int kmem64_alignsize, kmem64_szc;
extern uint64_t kmem64_pabase;
extern int max_bootlp_tteszc;
/*
* Maximum and default values for user heap, stack, private and shared
* anonymous memory, and user text and initialized data.
*
* Initial values are defined in architecture specific mach_vm_dep.c file.
* Used by map_pgsz*() routines.
*/
extern size_t max_uheap_lpsize;
extern size_t default_uheap_lpsize;
extern size_t max_ustack_lpsize;
extern size_t default_ustack_lpsize;
extern size_t max_privmap_lpsize;
extern size_t max_uidata_lpsize;
extern size_t max_utext_lpsize;
extern size_t max_shm_lpsize;
/*
* For adjusting the default lpsize, for DTLB-limited page sizes.
*/
/*
* Sanity control. Don't use large pages regardless of user
* settings if there's less than priv or shm_lpg_min_physmem memory installed.
* The units for this variable are 8K pages.
*/
extern pgcnt_t privm_lpg_min_physmem;
extern pgcnt_t shm_lpg_min_physmem;
/*
* AS_2_BIN macro controls the page coloring policy.
* 0 (default) uses various vaddr bits
* 1 virtual=paddr
* 2 bin hopping
*/
switch (consistent_coloring) { \
default: \
"AS_2_BIN: bad consistent coloring value"); \
/* assume default algorithm -> continue */ \
case 0: { \
int slew = 0; \
\
\
(vac_shift - MMU_PAGESHIFT)); \
} else { \
} \
break; \
} \
case 1: \
break; \
case 2: { \
\
/* make sure physical color aligns with vac color */ \
while ((cnt & vac_colors_mask) != \
addr_to_vcolor(addr)) { \
cnt++; \
} \
/* update per as page coloring fields */ \
} \
break; \
} \
} \
/*
* cpu private vm data - accessed thru CPU->cpu_vm_data
* vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
* vc_pnext_memseg: tracks last memseg visited in page_nextn()
* vc_kmptr: unaligned kmem pointer for this vm_cpu_data_t
* vc_kmsize: orignal kmem size for this vm_cpu_data_t
*/
typedef struct {
struct memseg *vc_pnum_memseg;
struct memseg *vc_pnext_memseg;
void *vc_kmptr;
/* allocation size to ensure vm_cpu_data_t resides in its own cache line */
#define VM_CPU_DATA_PADSIZE \
/* for boot cpu before kmem is initialized */
extern char vm_cpu_data0[];
/*
* Function to get an ecache color bin: F(as, cnt, vcolor).
* the goal of this function is to:
* - to spread a processes' physical pages across the entire ecache to
* maximize its use.
* - to minimize vac flushes caused when we reuse a physical page on a
* different vac color than it was previously used.
* - to prevent all processes to use the same exact colors and trash each
* other.
*
* cnt is a bin ptr kept on a per as basis. As we page_create we increment
* the ptr so we spread out the physical pages to cover the entire ecache.
* The virtual color is made a subset of the physical color in order to
* in minimize virtual cache flushing.
* We add in the as to spread out different as. This happens when we
* initialize the start count value.
* sizeof(struct as) is 60 so we shift by 3 to get into the bit range
* that will tend to change. For example, on spitfire based machines
* (vcshft == 1) contigous as are spread bu ~6 bins.
* vcshft provides for proper virtual color alignment.
* In theory cnt should be updated using cas only but if we are off by one
* or 2 it is no big deal.
* We also keep a start value which is used to randomize on what bin we
* start counting when it is time to start another loop. This avoids
* contigous allocations of ecache size to point to the same bin.
* Why 3? Seems work ok. Better than 7 or anything larger.
*/
#define PGCLR_LOOPFACTOR 3
/*
* When a bin is empty, and we can't satisfy a color request correctly,
* we scan. If we assume that the programs have reasonable spatial
* behavior, then it will not be a good idea to use the adjacent color.
* Using the adjacent color would result in virtually adjacent addresses
* mapping into the same spot in the cache. So, if we stumble across
* an empty bin, skip a bunch before looking. After the first skip,
* then just look one bin at a time so we don't miss our cache on
* every look. Be sure to check every bin. Page_create() will panic
* if we miss a page.
*
* This also explains the `<=' in the for loops in both page_get_freelist()
* and page_get_cachelist(). Since we checked the target bin, skipped
* a bunch, then continued one a time, we wind up checking the target bin
* twice to make sure we get all of them bins.
*/
#define BIN_STEP 20
#ifdef VM_STATS
struct vmm_vmstats_str {
/* page coalesce counter */
/* candidates useful */
/* ctrs changed after locking */
/* page_freelist_coalesce failed */
};
extern struct vmm_vmstats_str vmm_vmstats;
#endif /* VM_STATS */
/*
* Used to hold off page relocations into the cage until OBP has completed
* its boot-time handoff of its resources to the kernel.
*/
extern int page_relocate_ready;
/*
* cpu/mmu-dependent vm variables may be reset at bootup.
*/
extern uint_t mmu_page_sizes;
extern uint_t max_mmu_page_sizes;
extern uint_t mmu_hashcnt;
extern uint_t max_mmu_hashcnt;
extern size_t mmu_ism_pagesize;
extern int mmu_exported_pagesize_mask;
extern uint_t mmu_exported_page_sizes;
extern uint_t szc_2_userszc[];
extern uint_t userszc_2_szc[];
/*
* Platform specific page routines
*/
/*
* platform specific large pages for kernel heap support
*/
extern void mmu_init_kcontext();
extern uint64_t kcontextreg;
/*
* Nucleus data page allocator routines
*/
#ifdef __cplusplus
}
#endif
#endif /* _VM_DEP_H */