src/modules/indexer.py

#!usr/bin/python
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
#

import errno
import os
import platform
import shutil
from six.moves.urllib.parse import unquote

import pkg.fmri as fmri
import pkg.misc as misc
import pkg.lockfile as lockfile
import pkg.manifest as manifest
import pkg.portable as portable
import pkg.search_storage as ss
import pkg.search_errors as search_errors
import pkg.version
import pkg.client.progress as progress
from pkg.misc import EmptyI, PKG_DIR_MODE, PKG_FILE_BUFSIZ

# Constants for indicating whether pkgplans or fmri-manifest path pairs are
# used as arguments.
IDX_INPUT_TYPE_PKG = 0
IDX_INPUT_TYPE_FMRI = 1

INITIAL_VERSION_NUMBER = 1

FILE_OPEN_TIMEOUT_SECS = 2

MAX_FAST_INDEXED_PKGS = 20

SORT_FILE_PREFIX = "sort."

SORT_FILE_MAX_SIZE = 128 * 1024 * 1024


def makedirs(pathname):
        """Create a directory at the specified location if it does not
        already exist (including any parent directories).
        """

        try:
                os.makedirs(pathname, PKG_DIR_MODE)
        except EnvironmentError as e:
                if e.filename == pathname and (e.errno == errno.EEXIST or
                    os.path.exists(e.filename)):
                        return
                elif e.errno in (errno.EACCES, errno.EROFS):
                        raise search_errors.ProblematicPermissionsIndexException(
                            e.filename)
                elif e.errno != errno.EEXIST or e.filename != pathname:
                        raise


class Indexer(object):
        """Indexer is a class designed to index a set of manifests or pkg plans
        and provide a compact representation on disk, which is quickly
        searchable."""

        file_version_string = "VERSION: "

        def __init__(self, index_dir, get_manifest_func, get_manifest_path_func,
            progtrack=None, excludes=EmptyI, log=None,
            sort_file_max_size=SORT_FILE_MAX_SIZE):
                self._num_keys = 0
                self._num_manifests = 0
                self._num_entries = 0
                self.get_manifest_func = get_manifest_func
                self.get_manifest_path_func = get_manifest_path_func
                self.excludes = excludes
                self.__log = log
                self.sort_file_max_size = sort_file_max_size
                if self.sort_file_max_size <= 0:
                        raise search_errors.IndexingException(
                            _("sort_file_max_size must be greater than 0"))

                # This structure was used to gather all index files into one
                # location. If a new index structure is needed, the files can
                # be added (or removed) from here. Providing a list or
                # dictionary allows an easy approach to opening or closing all
                # index files.

                self._data_dict = {
                        "fast_add":
                            ss.IndexStoreSet(ss.FAST_ADD),
                        "fast_remove":
                            ss.IndexStoreSet(ss.FAST_REMOVE),
                        "manf":
                            ss.IndexStoreListDict(ss.MANIFEST_LIST,
                                build_function=self.__build_fmri,
                                decode_function=self.__decode_fmri),
                        "full_fmri": ss.IndexStoreSet(ss.FULL_FMRI_FILE),
                        "main_dict": ss.IndexStoreMainDict(ss.MAIN_FILE),
                        "token_byte_offset":
                            ss.IndexStoreDictMutable(ss.BYTE_OFFSET_FILE)
                        }

                self._data_fast_add = self._data_dict["fast_add"]
                self._data_fast_remove = self._data_dict["fast_remove"]
                self._data_manf = self._data_dict["manf"]
                self._data_full_fmri = self._data_dict["full_fmri"]
                self._data_main_dict = self._data_dict["main_dict"]
                self._data_token_offset = self._data_dict["token_byte_offset"]

                # This is added to the dictionary after the others because it
                # needs one of the other mappings as an input.
                self._data_dict["fmri_offsets"] = \
                    ss.InvertedDict(ss.FMRI_OFFSETS_FILE, self._data_manf)
                self._data_fmri_offsets = self._data_dict["fmri_offsets"]

                self._index_dir = index_dir
                self._tmp_dir = os.path.join(self._index_dir, "TMP")

                self.__lockfile = lockfile.LockFile(os.path.join(
                    self._index_dir, "lock"),
                    set_lockstr=lockfile.generic_lock_set_str,
                    get_lockstr=lockfile.generic_lock_get_str,
                    failure_exc=search_errors.IndexLockedException)

                self._indexed_manifests = 0
                self.server_repo = True
                self.empty_index = False
                self.file_version_number = None

                if progtrack is None:
                        self._progtrack = progress.NullProgressTracker()
                else:
                        self._progtrack = progtrack

                self._file_timeout_secs = FILE_OPEN_TIMEOUT_SECS

                self._sort_fh = None
                self._sort_file_num = 0
                self._sort_file_bytes = 0

                # The action type and key indexes, which are necessary for
                # efficient searches by type or key, store their file handles in
                # dictionaries.  File handles for actions are in at_fh, while
                # filehandles for keys are kept in st_fh.
                self.at_fh = {}
                self.st_fh = {}

                self.old_out_token = None

        @staticmethod
        def __decode_fmri(pfmri):
                """Turn fmris into strings correctly while writing out
                the fmri offsets file."""

                return pfmri.get_fmri(anarchy=True, include_scheme=False)

        @staticmethod
        def __build_fmri(s):
                """Build fmris while reading the fmri offset information."""

                return fmri.PkgFmri(s)

        @staticmethod
        def _build_version(vers):
                """ Private method for building versions from a string. """

                return pkg.version.Version(unquote(vers), None)

        def _read_input_indexes(self, directory):
                """ Opens all index files using consistent_open and reads all
                of them into memory except the main dictionary file to avoid
                inefficient memory usage."""

                res = ss.consistent_open(self._data_dict.values(), directory,
                    self._file_timeout_secs)
                pt = self._progtrack
                if res == None:
                        self.file_version_number = INITIAL_VERSION_NUMBER
                        self.empty_index = True
                        return None
                self.file_version_number = res

                try:
                        pt.job_start(pt.JOB_READ_SEARCH)
                        try:
                                for d in self._data_dict.values():
                                        if (d == self._data_main_dict or
                                                d == self._data_token_offset):
                                                pt.job_add_progress(
                                                    pt.JOB_READ_SEARCH)
                                                continue
                                        d.read_dict_file()
                                        pt.job_add_progress(pt.JOB_READ_SEARCH)
                        except:
                                self._data_dict["main_dict"].close_file_handle()
                                raise
                finally:
                        for d in self._data_dict.values():
                                if d == self._data_main_dict:
                                        continue
                                d.close_file_handle()
                        pt.job_done(pt.JOB_READ_SEARCH)

        def __close_sort_fh(self):
                """Utility fuction used to close and sort the temporary
                files used to produce a sorted main_dict file."""

                self._sort_fh.close()
                self._sort_file_bytes = 0
                tmp_file_name = os.path.join(self._tmp_dir,
                    SORT_FILE_PREFIX + str(self._sort_file_num - 1))
                tmp_fh = open(tmp_file_name, "r", buffering=PKG_FILE_BUFSIZ)
                l = [
                    (ss.IndexStoreMainDict.parse_main_dict_line_for_token(line),
                    line)
                    for line in tmp_fh
                ]
                tmp_fh.close()
                l.sort()
                tmp_fh = open(tmp_file_name, "w", buffering=PKG_FILE_BUFSIZ)
                tmp_fh.writelines((line for tok, line in l))
                tmp_fh.close()

        def _add_terms(self, pfmri, new_dict):
                """Adds tokens, and the actions generating them, to the current
                temporary sort file.

                The "pfmri" parameter is the fmri the information is coming
                from.

                The "new_dict" parameter maps tokens to the information about
                the action."""

                p_id = self._data_manf.get_id_and_add(pfmri)
                pfmri = p_id

                for tok_tup in new_dict.keys():
                        tok, action_type, subtype, fv = tok_tup
                        lst = [(action_type, [(subtype, [(fv, [(pfmri,
                            list(new_dict[tok_tup]))])])])]
                        s = ss.IndexStoreMainDict.transform_main_dict_line(tok,
                            lst)
                        if len(s) + self._sort_file_bytes >= \
                            self.sort_file_max_size:
                                self.__close_sort_fh()
                                self._sort_fh = open(os.path.join(self._tmp_dir,
                                    SORT_FILE_PREFIX +
                                    str(self._sort_file_num)), "w",
                                    buffering=PKG_FILE_BUFSIZ)
                                self._sort_file_num += 1
                        self._sort_fh.write(s)
                        self._sort_file_bytes += len(s)
                return

        def _fast_update(self, filters_pkgplan_list):
                """Updates the log of packages which have been installed or
                removed since the last time the index has been rebuilt.

                There are two axes to consider: whether the package is being
                added or removed; whether this version of the package is
                already present in an update log.

                Case 1: The package is being installed and is not in the
                    update log.  In this case, the new package is simply added
                    to the install update log.
                Case 2: The package is being installed and is in the removal
                    update log. In this case, the package is removed from the
                    remove update log.  This has the effect of exposing the
                    entries in the existing index to the user.
                Case 3: The package is being removed and is not in the
                    update log.  In this case, the new package is simply added
                    to the removed update log.
                Case 4: The package is being removed and is in the installed
                    update log.  In this case, the package is removed from the
                    install update log.

                The "filters_pkgplan_list" parameter is a tuple of a list of
                filters, which are currently ignored, and a list of pkgplans
                that indicated which versions of a package are being added or
                removed."""

                nfast_add = len(self._data_fast_add._set)
                nfast_remove = len(self._data_fast_remove._set)

                #
                # First pass determines whether a fast update makes sense and
                # updates the list of fmris that will be in the index.
                #
                filters, pkgplan_list = filters_pkgplan_list
                for p in pkgplan_list:
                        d_fmri, o_fmri = p
                        if d_fmri:
                                self._data_full_fmri.add_entity(
                                    d_fmri.get_fmri(anarchy=True))
                                d_tmp = d_fmri.get_fmri(anarchy=True,
                                    include_scheme=False)
                                if self._data_fast_remove.has_entity(d_tmp):
                                        nfast_remove -= 1
                                else:
                                        nfast_add += 1
                        if o_fmri:
                                self._data_full_fmri.remove_entity(
                                    o_fmri.get_fmri(anarchy=True))
                                o_tmp = o_fmri.get_fmri(anarchy=True,
                                    include_scheme=False)
                                if self._data_fast_add.has_entity(o_tmp):
                                        nfast_add -= 1
                                else:
                                        nfast_remove += 1

                if nfast_add > MAX_FAST_INDEXED_PKGS:
                        return False

                #
                # Second pass actually updates the fast_add and fast_remove
                # sets and updates progress.
                #
                self._progtrack.job_start(self._progtrack.JOB_UPDATE_SEARCH,
                    goal=len(pkgplan_list))
                for p in pkgplan_list:
                        d_fmri, o_fmri = p

                        if d_fmri:
                                d_tmp = d_fmri.get_fmri(anarchy=True,
                                    include_scheme=False)
                                assert not self._data_fast_add.has_entity(d_tmp)
                                if self._data_fast_remove.has_entity(d_tmp):
                                        self._data_fast_remove.remove_entity(
                                            d_tmp)
                                else:
                                        self._data_fast_add.add_entity(d_tmp)
                        if o_fmri:
                                o_tmp = o_fmri.get_fmri(anarchy=True,
                                    include_scheme=False)
                                assert not self._data_fast_remove.has_entity(
                                    o_tmp)
                                if self._data_fast_add.has_entity(o_tmp):
                                        self._data_fast_add.remove_entity(o_tmp)
                                else:
                                        self._data_fast_remove.add_entity(o_tmp)

                        self._progtrack.job_add_progress(
                            self._progtrack.JOB_UPDATE_SEARCH)

                self._progtrack.job_done(self._progtrack.JOB_UPDATE_SEARCH)

                return True

        def _process_fmris(self, fmris):
                """Takes a list of fmris and updates the internal storage to
                reflect the new packages."""

                removed_paths = []

                for added_fmri in fmris:
                        self._data_full_fmri.add_entity(
                            added_fmri.get_fmri(anarchy=True))
                        new_dict = manifest.Manifest.search_dict(
                            self.get_manifest_path_func(added_fmri),
                            self.excludes, log=self.__log)
                        self._add_terms(added_fmri, new_dict)

                        self._progtrack.job_add_progress(
                            self._progtrack.JOB_REBUILD_SEARCH)
                return removed_paths

        def _write_main_dict_line(self, file_handle, token,
            fv_fmri_pos_list_list, out_dir):
                """Writes out the new main dictionary file and also adds the
                token offsets to _data_token_offset. file_handle is the file
                handle for the output main dictionary file. token is the token
                to add to the file. fv_fmri_pos_list_list is a structure of
                lists inside of lists several layers deep. The top layer is a
                list of action types. The second layer contains the keys for
                the action type it's a sublist for. The third layer contains
                the values which matched the token for the action and key it's
                contained in. The fourth layer is the fmris which contain those
                matches. The fifth layer is the offset into the manifest of
                each fmri for each matching value. out_dir points to the
                base directory to use to write a file for each package which
                contains the offsets into the main dictionary for the tokens
                this package matches."""

                if self.old_out_token is not None and \
                    self.old_out_token >= token:
                        raise RuntimeError("In writing dict line, token:{0}, "
                            "old_out_token:{1}".format(token,
                            self.old_out_token))
                self.old_out_token = token

                cur_location_int = file_handle.tell()
                cur_location = str(cur_location_int)
                self._data_token_offset.write_entity(token, cur_location)

                for at, st_list in fv_fmri_pos_list_list:
                        self._progtrack.job_add_progress(
                            self._progtrack.JOB_REBUILD_SEARCH, nitems=0)
                        if at not in self.at_fh:
                                self.at_fh[at] = open(os.path.join(out_dir,
                                    "__at_" + at), "w")
                        self.at_fh[at].write(cur_location + "\n")
                        for st, fv_list in st_list:
                                if st not in self.st_fh:
                                        self.st_fh[st] = \
                                            open(os.path.join(out_dir,
                                            "__st_" + st), "w")
                                self.st_fh[st].write(cur_location + "\n")
                                for fv, p_list in fv_list:
                                        for p_id, m_off_set in p_list:
                                                p_id = int(p_id)
                                                self._data_fmri_offsets.add_pair(
                                                    p_id, cur_location_int)
                file_handle.write(self._data_main_dict.transform_main_dict_line(
                    token, fv_fmri_pos_list_list))

        @staticmethod
        def __splice(ret_list, source_list):
                """Takes two arguments. Each of the arguments must be a list
                with the type signature list of ('a, list of 'b). Where
                the lists share a value (A) for 'a, it splices the lists of 'b
                paired with A from each list into a single list and makes that
                the new list paired with A in the result.

                Note: This modifies the ret_list rather than building a new one
                because of the large performance difference between the two
                approaches."""

                tmp_res = []
                for val, sublist in source_list:
                        found = False
                        for r_val, r_sublist in ret_list:
                                if val == r_val:
                                        found = True
                                        Indexer.__splice(r_sublist, sublist)
                                        break
                        if not found:
                                tmp_res.append((val, sublist))
                ret_list.extend(tmp_res)

        def _gen_new_toks_from_files(self):
                """Produces a stream of ordered tokens and the associated
                information for those tokens from the sorted temporary files
                produced by _add_terms. In short, this is the merge part of the
                merge sort being done on the tokens to be indexed."""

                def get_line(fh):
                        """Helper function to make the initialization of the
                        fh_dict easier to understand."""

                        try:
                                return \
                                        ss.IndexStoreMainDict.parse_main_dict_line(
                                        next(fh))
                        except StopIteration:
                                return None

                # Build a mapping from numbers to the file handle for the
                # temporary sort file with that number.
                fh_dict = dict([
                    (i, open(os.path.join(self._tmp_dir,
                    SORT_FILE_PREFIX + str(i)), "r",
                    buffering=PKG_FILE_BUFSIZ))
                    for i in range(self._sort_file_num)
                ])

                cur_toks = {}
                # Seed cur_toks with the first token from each temporary file.
                # The line may not exist since, for a empty repo, an empty file
                # is created.
                for i in list(fh_dict.keys()):
                        line = get_line(fh_dict[i])
                        if line is None:
                                del fh_dict[i]
                        else:
                                cur_toks[i] = line

                old_min_token = None
                # cur_toks will have items deleted from it as files no longer
                # have tokens to provide. When no files have tokens, the merge
                # is done.
                while cur_toks:
                        min_token = None
                        matches = []
                        # Find smallest available token and the temporary files
                        # which contain that token.
                        for i in fh_dict.keys():
                                cur_tok, info = cur_toks[i]
                                if cur_tok is None:
                                        continue
                                if min_token is None or cur_tok < min_token:
                                        min_token = cur_tok
                                        matches = [i]
                                elif cur_tok == min_token:
                                        matches.append(i)
                        assert min_token is not None
                        assert len(matches) > 0
                        res = None
                        for i in matches:
                                new_tok, new_info = cur_toks[i]
                                assert new_tok == min_token
                                try:
                                        # Continue pulling the next tokens from
                                        # and adding them to the result list as
                                        # long as the token matches min_token.
                                        while new_tok == min_token:
                                                if res is None:
                                                        res = new_info
                                                else:
                                                        self.__splice(res,
                                                            new_info)
                                                new_tok, new_info = \
                                                    ss.IndexStoreMainDict.parse_main_dict_line(next(fh_dict[i]))
                                        cur_toks[i] = new_tok, new_info
                                except StopIteration:
                                        # When a StopIteration happens, the
                                        # last line in the file has been read
                                        # and processed. Delete all the
                                        # information associated with that file
                                        # so that we no longer check that file.
                                        fh_dict[i].close()
                                        del fh_dict[i]
                                        del cur_toks[i]
                        assert res is not None
                        if old_min_token is not None and \
                            old_min_token >= min_token:
                                raise RuntimeError("Got min token:{0} greater "
                                    "than old_min_token:{1}".format(
                                    min_token, old_min_token))
                        old_min_token = min_token
                        if min_token != "":
                                yield min_token, res
                return

        def _update_index(self, dicts, out_dir):
                """Processes the main dictionary file and writes out a new
                main dictionary file reflecting the changes in the packages.

                The "dicts" parameter is the list of fmris which have been
                removed during update.

                The "out_dir" parameter is the temporary directory in which to
                build the indexes."""

                removed_paths = dicts

                if self.empty_index:
                        file_handle = []
                else:
                        file_handle = self._data_main_dict.get_file_handle()
                        assert file_handle

                if self.file_version_number == None:
                        self.file_version_number = INITIAL_VERSION_NUMBER
                else:
                        self.file_version_number += 1

                self._data_main_dict.write_dict_file(
                    out_dir, self.file_version_number)
                # The dictionary file's opened in append mode to avoid removing
                # the version information the search storage class added.
                out_main_dict_handle = \
                    open(os.path.join(out_dir,
                        self._data_main_dict.get_file_name()), "a",
                        buffering=PKG_FILE_BUFSIZ)

                self._data_token_offset.open_out_file(out_dir,
                    self.file_version_number)

                new_toks_available = True
                new_toks_it = self._gen_new_toks_from_files()
                try:
                        tmp = next(new_toks_it)
                        next_new_tok, new_tok_info = tmp
                except StopIteration:
                        new_toks_available = False

                try:
                        for line in file_handle:
                                (tok, at_lst) = \
                                    self._data_main_dict.parse_main_dict_line(
                                    line)
                                existing_entries = []
                                for at, st_list in at_lst:
                                        st_res = []
                                        for st, fv_list in st_list:
                                                fv_res = []
                                                for fv, p_list in fv_list:
                                                        p_res = []
                                                        for p_id, m_off_set in \
                                                                    p_list:
                                                                p_id = int(p_id)
                                                                pfmri = self._data_manf.get_entity(p_id)
                                                                if pfmri not in removed_paths:
                                                                        p_res.append((p_id, m_off_set))
                                                        if p_res:
                                                                fv_res.append(
                                                                    (fv, p_res))
                                                if fv_res:
                                                        st_res.append(
                                                            (st, fv_res))
                                        if st_res:
                                                existing_entries.append(
                                                    (at, st_res))
                                # Add tokens newly discovered in the added
                                # packages which are alphabetically earlier
                                # than the token most recently read from the
                                # existing main dictionary file.
                                while new_toks_available and next_new_tok < tok:
                                        assert len(next_new_tok) > 0
                                        self._write_main_dict_line(
                                            out_main_dict_handle, next_new_tok,
                                            new_tok_info, out_dir)
                                        try:
                                                next_new_tok, new_tok_info = \
                                                    next(new_toks_it)
                                        except StopIteration:
                                                new_toks_available = False
                                                del next_new_tok
                                                del new_tok_info

                                # Combine the information about the current
                                # token from the new packages with the existing
                                # information for that token.
                                if new_toks_available and next_new_tok == tok:
                                        self.__splice(existing_entries,
                                            new_tok_info)
                                        try:
                                                next_new_tok, new_tok_info = \
                                                    next(new_toks_it)
                                        except StopIteration:
                                                new_toks_available = False
                                                del next_new_tok
                                                del new_tok_info
                                # If this token has any packages still
                                # associated with it, write them to the file.
                                if existing_entries:
                                        assert len(tok) > 0
                                        self._write_main_dict_line(
                                            out_main_dict_handle,
                                            tok, existing_entries, out_dir)

                        # For any new tokens which are alphabetically after the
                        # last entry in the existing file, add them to the end
                        # of the file.
                        while new_toks_available:
                                assert len(next_new_tok) > 0
                                self._write_main_dict_line(
                                    out_main_dict_handle, next_new_tok,
                                    new_tok_info, out_dir)
                                try:
                                        next_new_tok, new_tok_info = \
                                            next(new_toks_it)
                                except StopIteration:
                                        new_toks_available = False
                finally:
                        if not self.empty_index:
                                file_handle.close()
                                self._data_main_dict.close_file_handle()

                        out_main_dict_handle.close()
                        self._data_token_offset.close_file_handle()
                        for fh in self.at_fh.values():
                                fh.close()
                        for fh in self.st_fh.values():
                                fh.close()

                        removed_paths = []

        def _write_assistant_dicts(self, out_dir):
                """Write out the companion dictionaries needed for
                translating the internal representation of the main
                dictionary into human readable information.

                The "out_dir" parameter is the temporary directory to write
                the indexes into."""

                for d in self._data_dict.values():
                        if d == self._data_main_dict or \
                            d == self._data_token_offset:
                                continue
                        d.write_dict_file(out_dir, self.file_version_number)

        def _generic_update_index(self, inputs, input_type,
            tmp_index_dir=None, image=None):
                """Performs all the steps needed to update the indexes.

                The "inputs" parameter iterates over the fmris which have been
                added or the pkgplans for the change in the image.

                The "input_type" paramter is a value specifying whether the
                input is fmris or pkgplans.

                The "tmp_index_dir" parameter allows this function to use a
                different temporary directory than the default.

                The "image" parameter must be set if "input_type" is pkgplans.
                It allows the index to automatically be rebuilt if the number
                of packages added since last index rebuild is greater than
                MAX_ADDED_NUMBER_PACKAGES."""

                self.lock()
                try:
                        # Allow the use of a directory other than the default
                        # directory to store the intermediate results in.
                        if not tmp_index_dir:
                                tmp_index_dir = self._tmp_dir
                        assert not (tmp_index_dir == self._index_dir)

                        # Read the existing dictionaries.
                        self._read_input_indexes(self._index_dir)
                except:
                        self.unlock()
                        raise

                try:
                        # If the temporary indexing directory already exists,
                        # remove it to ensure its empty.  Since the caller
                        # should have locked the index already, this should
                        # be safe.
                        if os.path.exists(tmp_index_dir):
                                shutil.rmtree(tmp_index_dir)

                        # Create directory.
                        makedirs(os.path.join(tmp_index_dir))

                        inputs = list(inputs)
                        fast_update = False

                        if input_type == IDX_INPUT_TYPE_PKG:
                                assert image

                                #
                                # Try to do a fast update; if that fails,
                                # do a full index rebuild.
                                #
                                fast_update = self._fast_update(inputs)

                                if not fast_update:
                                        self._data_main_dict.close_file_handle()
                                        self._data_fast_add.clear()
                                        self._data_fast_remove.clear()

                                        # Before passing control to rebuild
                                        # index, the index lock must be
                                        # released.
                                        self.unlock()
                                        return self.rebuild_index_from_scratch(
                                            image.gen_installed_pkgs(),
                                            tmp_index_dir)

                        elif input_type == IDX_INPUT_TYPE_FMRI:
                                assert not self._sort_fh
                                self._sort_fh = open(os.path.join(self._tmp_dir,
                                    SORT_FILE_PREFIX +
                                    str(self._sort_file_num)), "w")
                                self._sort_file_num += 1

                                self._progtrack.job_start(
                                    self._progtrack.JOB_REBUILD_SEARCH,
                                    goal=len(inputs))
                                dicts = self._process_fmris(inputs)
                                # Update the main dictionary file
                                self.__close_sort_fh()
                                self._update_index(dicts, tmp_index_dir)
                                self._progtrack.job_done(
                                    self._progtrack.JOB_REBUILD_SEARCH)

                                self.empty_index = False
                        else:
                                raise RuntimeError(
                                    "Got unknown input_type: {0}", input_type)

                        # Write out the helper dictionaries
                        self._write_assistant_dicts(tmp_index_dir)

                        # Move all files from the tmp directory into the index
                        # dir. Note: the need for consistent_open is that
                        # migrate is not an atomic action.
                        self._migrate(source_dir = tmp_index_dir,
                            fast_update=fast_update)
                        self.unlock()

                except:
                        self.unlock()
                        raise
                finally:
                        self._data_main_dict.close_file_handle()

        def client_update_index(self, pkgplan_list, image, tmp_index_dir=None):
                """This version of update index is designed to work with the
                client side of things.  Specifically, it expects a pkg plan
                list with added and removed FMRIs/manifests.  Note: if
                tmp_index_dir is specified, it must NOT exist in the current
                directory structure.  This prevents the indexer from
                accidentally removing files.  Image the image object. This is
                needed to allow correct reindexing from scratch to occur."""

                self._generic_update_index(pkgplan_list, IDX_INPUT_TYPE_PKG,
                    tmp_index_dir=tmp_index_dir, image=image)

        def server_update_index(self, fmris, tmp_index_dir=None):
                """ This version of update index is designed to work with the
                server side of things. Specifically, since we don't currently
                support removal of a package from a repo, this function simply
                takes a list of FMRIs to be added to the repot.  Currently, the
                only way to remove a package from the index is to remove it
                from the depot and reindex.  Note: if tmp_index_dir is
                specified, it must NOT exist in the current directory structure.
                This prevents the indexer from accidentally removing files."""

                self._generic_update_index(fmris, IDX_INPUT_TYPE_FMRI,
                    tmp_index_dir)

        def check_index_existence(self):
                """ Returns a boolean value indicating whether a consistent
                index exists. If an index exists but is inconsistent, an
                exception is raised."""

                try:
                        try:
                                res = \
                                    ss.consistent_open(self._data_dict.values(),
                                        self._index_dir,
                                        self._file_timeout_secs)
                        except (KeyboardInterrupt,
                            search_errors.InconsistentIndexException):
                                raise
                        except Exception:
                                return False
                finally:
                        for d in self._data_dict.values():
                                d.close_file_handle()
                assert res is not 0
                return res

        def rebuild_index_from_scratch(self, fmris, tmp_index_dir=None):
                """Removes any existing index directory and rebuilds the
                index based on the fmris and manifests provided as an
                argument.

                The "tmp_index_dir" parameter allows for a different directory
                than the default to be used."""

                self.file_version_number = INITIAL_VERSION_NUMBER
                self.empty_index = True

                # A lock can't be held while the index directory is being
                # removed as that can cause rmtree() to fail when using
                # NFS.  As such, attempt to get the lock first, then
                # unlock, immediately rename the old index directory,
                # and then remove the old the index directory and
                # create a new one.
                self.lock()
                self.unlock()

                portable.rename(self._index_dir, self._index_dir + ".old")
                try:
                        shutil.rmtree(self._index_dir + ".old")
                        makedirs(self._index_dir)
                except OSError as e:
                        if e.errno == errno.EACCES:
                                raise search_errors.ProblematicPermissionsIndexException(
                                    self._index_dir)

                self._generic_update_index(fmris, IDX_INPUT_TYPE_FMRI,
                    tmp_index_dir)
                self.empty_index = False

        def setup(self):
                """Seeds the index directory with empty stubs if the directory
                is consistently empty.  Does not overwrite existing indexes."""

                absent = False
                present = False

                makedirs(self._index_dir)
                for d in self._data_dict.values():
                        file_path = os.path.join(self._index_dir,
                            d.get_file_name())
                        if os.path.exists(file_path):
                                present = True
                        else:
                                absent = True
                        if absent and present:
                                raise search_errors.InconsistentIndexException(
                                        self._index_dir)
                if present:
                        return
                if self.file_version_number:
                        raise RuntimeError("Got file_version_number other than "
                            "None in setup.")
                self.file_version_number = INITIAL_VERSION_NUMBER
                for d in self._data_dict.values():
                        d.write_dict_file(self._index_dir,
                            self.file_version_number)

        @staticmethod
        def check_for_updates(index_root, cat):
                """Check to see whether the catalog has fmris which have not
                been indexed.

                'index_root' is the path to the index to check against.

                'cat' is the catalog to check for new fmris."""

                fmri_set = set((f.remove_publisher() for f in cat.fmris()))

                data = ss.IndexStoreSet("full_fmri_list")
                try:
                        data.open(index_root)
                except IOError as e:
                        if not os.path.exists(os.path.join(
                                index_root, data.get_file_name())):
                                return fmri_set
                        else:
                                raise
                try:
                        data.read_and_discard_matching_from_argument(fmri_set)
                finally:
                        data.close_file_handle()
                return fmri_set

        def _migrate(self, source_dir=None, dest_dir=None, fast_update=False):
                """Moves the indexes from a temporary directory to the
                permanent one.

                The "source_dir" parameter is the directory containing the
                new information.

                The "dest_dir" parameter is the directory containing the
                old information.

                The "fast_update" parameter determines whether the main
                dictionary and the token byte offset files are moved.  This is
                used so that when only the update logs are touched, the large
                files don't need to be moved."""

                if not source_dir:
                        source_dir = self._tmp_dir
                if not dest_dir:
                        dest_dir = self._index_dir
                assert not (source_dir == dest_dir)

                for d in self._data_dict.values():
                        if fast_update and (d == self._data_main_dict or
                            d == self._data_token_offset or
                            d == self._data_fmri_offsets):
                                continue
                        else:
                                shutil.move(os.path.join(source_dir,
                                    d.get_file_name()),
                                    os.path.join(dest_dir, d.get_file_name()))
                if not fast_update:
                        # Remove legacy index/pkg/ directory which is obsoleted
                        # by the fmri_offsets.v1 file.
                        try:
                                shutil.rmtree(os.path.join(dest_dir, "pkg"))
                        except KeyboardInterrupt:
                                raise
                        except Exception:
                                pass

                        for at, fh in self.at_fh.items():
                                shutil.move(
                                    os.path.join(source_dir, "__at_" + at),
                                    os.path.join(dest_dir, "__at_" + at))

                        for st, fh in self.st_fh.items():
                                shutil.move(
                                    os.path.join(source_dir, "__st_" + st),
                                    os.path.join(dest_dir, "__st_" + st))
                shutil.rmtree(source_dir)

        def lock(self, blocking=False):
                """Locks the index in preparation for an index-modifying
                operation.  Raises an IndexLockedException exception on
                failure.

                'blocking' is an optional boolean value indicating whether
                to block until the lock can be obtained or to raise an
                exception immediately if it cannot be."""

                try:
                        # Attempt to obtain a file lock.
                        self.__lockfile.lock(blocking=blocking)
                except EnvironmentError as e:
                        if e.errno == errno.ENOENT:
                                # If a lock was requested, and the only
                                # reason for failure was because the
                                # index directory doesn't exist yet,
                                # then create it and try once more.
                                if not os.path.exists(self._index_dir):
                                        makedirs(self._index_dir)
                                        return self.lock(blocking=blocking)
                                raise
                        if e.errno in (errno.EACCES, errno.EROFS):
                                raise search_errors.\
                                    ProblematicPermissionsIndexException(
                                    e.filename)
                        raise

        def unlock(self):
                """Unlocks the index."""

                self.__lockfile.unlock()