/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "merge.h"
/*
* External merge sort
*
* The following code implements the merge phase of sort(1) using a heap-based
* priority queue. Fast paths for merging two files as well as outputting a
* single file are provided.
*
* Memory footprint management
*
* The N-way fan-out of the merge phase can lead to compromising memory
* consumption if not constrained, so two mechanisms are used to regulate
* the memory footprint during the merge phase:
*
* 1. Single use memory advice. Since we proceed through each merge file in
* order, any line we have output is never required again--at least, not
* from that input file. Accordingly, we use the SOP_RELEASE_LINE()
* operation to advise that the memory backing the raw data for the stream
* up to that line is no longer of interest. (For certain classes of
* streams, this leads to an madvise(3C) call with the MADV_DONTNEED
* flag.)
*
* 2. Number of merge files. The number of merge files is constrained based
* on the amount of physical memory specified via the -S option (or deemed
* available based on an inquiry of sysconf(3C) for _SC_AVPHYS_PAGES).
* The number of merge files is calculated based on the average resident
* size of a stream that supports the SOP_RELEASE_LINE() operation; this
* number is conservative for streams that do not support this operation.
* A minimum of four subfiles will always be used, resource limits
* permitting.
*
* Temporary filespace footprint management
*
* Once the merge sort has utilized a temporary file, it may be deleted at
* close, as it's not used again and preserving the files until exit may
* compromise sort completion when limited temporary space is available.
*/
static int pq_N;
static int
{
(S->m_unique_lines ? STREAM_UNIQUE : 0));
if (S->m_output_to_stdout) {
} else
return (SOP_OPEN_FOR_WRITE(ostrp));
}
static void
{
(void) mg_coll_convert(fields_chain,
}
}
}
static void
{
if (str_a->s_element_size == sizeof (char))
else
return;
return;
}
return;
}
for (;;) {
coll_flags) < 0) {
break;
}
continue;
} else {
break;
}
continue;
}
}
}
}
/*
* priority queue routines
* used for merges involving more than two sources
*/
static void
{
while (k > 1 &&
coll_flags) > 0) {
k /= 2;
}
}
static void
{
int j;
while (2 * k <= N) {
j = 2 * k;
if (j < N && pq_coll_fcn(&A[j]->s_current,
j++;
coll_flags) <= 0)
break;
k = j;
}
}
static int
{
return (pq_N == 0);
}
static void
{
pq_N = 0;
}
static void
{
}
static stream_t *
{
}
static void
{
int n_opens = 0;
int threshold_opens;
for (;;) {
while (bot_streamp != NULL) {
if (n_opens > threshold_opens ||
/*
* Available file descriptors would exceed
* memory target or have been exhausted; back
* off to the last valid, primed stream.
*/
break;
}
n_opens++;
}
if (bot_streamp == NULL) {
else
} else {
(is_single_byte ? 0 : ST_WIDE));
if (loop_out_streamp == NULL ||
/*
* We need three file descriptors to make
* progress; if top_streamp == bot_streamp, then
* we have only two.
*/
}
/*
* Empty stream?
*/
SOP_EOS(cur_streamp)) {
continue;
}
/*
* Given that stream is not empty, any error in priming
* must be fatal.
*/
(void) mg_coll_convert(S->m_fields_head,
S->m_field_separator);
}
while (!pqueue_empty()) {
if (!SOP_EOS(cur_streamp)) {
(void) mg_coll_convert(S->m_fields_head,
S->m_field_separator);
}
}
while (cur_streamp != bot_streamp) {
(void) SOP_FREE(cur_streamp);
(void) SOP_CLOSE(cur_streamp);
}
(void) SOP_FLUSH(loop_out_streamp);
if (bot_streamp == NULL)
break;
(void) SOP_CLOSE(loop_out_streamp);
/*
* Get file size so that we may treat intermediate files
* with our stream_mmap facilities.
*/
}
n_opens = 0;
}
}
void
{
if (S->m_merge_only) {
merge_chain = S->m_input_streams;
} else {
/*
* Otherwise we're inheriting the temporary output files from
* our internal sort.
*/
}
if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
else
coll_flags = 0;
if (S->m_entire_line)
switch (n_merges) {
case 0:
/*
* No files for merge.
*/
break;
case 1:
/*
* Fast path: only one file for merge.
*/
(void) stream_open_for_read(S, cur_streamp);
(void) prepare_output_stream(&out_stream, S);
&out_stream, S->m_field_separator);
break;
case 2:
/*
* Fast path: only two files for merge.
*/
(void) stream_open_for_read(S, cur_streamp);
S->m_field_separator, coll_flags);
break;
default:
/*
* Full merge.
*/
break;
}
}