src/modules/search_storage.py

429N/A#!/usr/bin/python
429N/A#
429N/A# CDDL HEADER START
429N/A#
429N/A# The contents of this file are subject to the terms of the
429N/A# Common Development and Distribution License (the "License").
429N/A# You may not use this file except in compliance with the License.
429N/A#
429N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
429N/A# or http://www.opensolaris.org/os/licensing.
429N/A# See the License for the specific language governing permissions
429N/A# and limitations under the License.
429N/A#
429N/A# When distributing Covered Code, include this CDDL HEADER in each
429N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
429N/A# If applicable, add the following below this CDDL HEADER, with the
429N/A# fields enclosed by brackets "[]" replaced with your own identifying
429N/A# information: Portions Copyright [yyyy] [name of copyright owner]
429N/A#
429N/A# CDDL HEADER END
429N/A#
941N/A
941N/A#
3339N/A# Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
941N/A#
429N/A
429N/Aimport os
429N/Aimport errno
429N/Aimport time
1516N/Aimport hashlib
3234N/Afrom six.moves.urllib.parse import quote, unquote
429N/A
429N/Aimport pkg.fmri as fmri
429N/Aimport pkg.search_errors as search_errors
429N/Aimport pkg.portable as portable
3339N/Afrom pkg.misc import PKG_FILE_BUFSIZ, force_bytes
429N/A
941N/AFAST_ADD = 'fast_add.v1'
941N/AFAST_REMOVE = 'fast_remove.v1'
941N/AMANIFEST_LIST = 'manf_list.v1'
546N/AFULL_FMRI_FILE = 'full_fmri_list'
941N/AMAIN_FILE = 'main_dict.ascii.v2'
546N/ABYTE_OFFSET_FILE = 'token_byte_offset.v1'
546N/AFULL_FMRI_HASH_FILE = 'full_fmri_list.hash'
1054N/AFMRI_OFFSETS_FILE = 'fmri_offsets.v1'
546N/A
516N/Adef consistent_open(data_list, directory, timeout = 1):
429N/A        """Opens all data holders in data_list and ensures that the
429N/A        versions are consistent among all of them.
429N/A        It retries several times in case a race condition between file
429N/A        migration and open is encountered.
429N/A        Note: Do not set timeout to be 0. It will cause an exception to be
429N/A        immediately raised.
1100N/A        """
429N/A
429N/A        missing = None
429N/A        cur_version = None
429N/A
429N/A        start_time = time.time()
429N/A
429N/A        while cur_version == None and missing != True:
441N/A                # The assignments to cur_version and missing cannot be
441N/A                # placed here. They must be reset prior to breaking out of the
441N/A                # for loop so that the while loop condition will be true. They
441N/A                # cannot be placed after the for loop since that path is taken
441N/A                # when all files are missing or opened successfully.
429N/A                if timeout != None and ((time.time() - start_time) > timeout):
429N/A                        raise search_errors.InconsistentIndexException(
429N/A                            directory)
429N/A                for d in data_list:
429N/A                        # All indexes must have the same version and all must
429N/A                        # either be present or absent for a successful return.
429N/A                        # If one of these conditions is not met, the function
429N/A                        # tries again until it succeeds or the time spent in
429N/A                        # in the function is greater than timeout.
429N/A                        try:
429N/A                                f = os.path.join(directory, d.get_file_name())
3339N/A                                fh = open(f, 'r')
429N/A                                # If we get here, then the current index file
429N/A                                # is present.
429N/A                                if missing == None:
429N/A                                        missing = False
429N/A                                elif missing:
429N/A                                        for dl in data_list:
429N/A                                                dl.close_file_handle()
429N/A                                        missing = None
429N/A                                        cur_version = None
441N/A                                        break
429N/A                                d.set_file_handle(fh, f)
546N/A                                version_tmp = fh.readline()
429N/A                                version_num = \
429N/A                                    int(version_tmp.split(' ')[1].rstrip('\n'))
429N/A                                # Read the version. If this is the first file,
429N/A                                # set the expected version otherwise check that
429N/A                                # the version matches the expected version.
429N/A                                if cur_version == None:
429N/A                                        cur_version = version_num
429N/A                                elif not (cur_version == version_num):
429N/A                                        # Got inconsistent versions, so close
429N/A                                        # all files and try again.
429N/A                                        for d in data_list:
429N/A                                                d.close_file_handle()
429N/A                                        missing = None
429N/A                                        cur_version = None
441N/A                                        break
3171N/A                        except IOError as e:
429N/A                                if e.errno == errno.ENOENT:
429N/A                                        # If the index file is missing, ensure
429N/A                                        # that previous files were missing as
429N/A                                        # well. If not, try again.
429N/A                                        if missing == False:
429N/A                                                for d in data_list:
429N/A                                                        d.close_file_handle()
429N/A                                                missing = None
429N/A                                                cur_version = None
441N/A                                                break
429N/A                                        missing = True
429N/A                                else:
429N/A                                        for d in data_list:
429N/A                                                d.close_file_handle()
429N/A                                        raise
429N/A        if missing:
429N/A                assert cur_version == None
429N/A                # The index is missing (ie, no files were present).
429N/A                return None
429N/A        else:
429N/A                assert cur_version is not None
429N/A                return cur_version
429N/A
429N/A
429N/Aclass IndexStoreBase(object):
429N/A        """Base class for all data storage used by the indexer and
429N/A        queryEngine. All members must have a file name and maintain
429N/A        an internal file handle to that file as instructed by external
429N/A        calls.
429N/A        """
429N/A
429N/A        def __init__(self, file_name):
429N/A                self._name = file_name
429N/A                self._file_handle = None
429N/A                self._file_path = None
429N/A                self._size = None
429N/A                self._mtime = None
1230N/A                self._inode = None
1230N/A                self._have_read = False
429N/A
429N/A        def get_file_name(self):
429N/A                return self._name
429N/A
429N/A        def set_file_handle(self, f_handle, f_path):
429N/A                if self._file_handle:
429N/A                        raise RuntimeError("setting an extant file handle, "
429N/A                            "must close first, fp is: " + f_path)
429N/A                else:
429N/A                        self._file_handle = f_handle
429N/A                        self._file_path = f_path
3234N/A                        if self._mtime is None:
1230N/A                                stat_info = os.stat(self._file_path)
1230N/A                                self._mtime = stat_info.st_mtime
1230N/A                                self._size = stat_info.st_size
1230N/A                                self._inode = stat_info.st_ino
429N/A
429N/A        def get_file_path(self):
429N/A                return self._file_path
429N/A
941N/A        def __copy__(self):
941N/A                return self.__class__(self._name)
941N/A
429N/A        def close_file_handle(self):
429N/A                """Closes the file handle and clears it so that it cannot
429N/A                be reused.
429N/A                """
1230N/A
429N/A                if self._file_handle:
429N/A                        self._file_handle.close()
429N/A                        self._file_handle = None
429N/A
429N/A        def _protected_write_dict_file(self, path, version_num, iterable):
429N/A                """Writes the dictionary in the expected format.
429N/A                Note: Only child classes should call this method.
429N/A                """
429N/A                version_string = "VERSION: "
3339N/A                file_handle = open(os.path.join(path, self._name), 'w')
429N/A                file_handle.write(version_string + str(version_num) + "\n")
429N/A                for name in iterable:
429N/A                        file_handle.write(str(name) + "\n")
429N/A                file_handle.close()
429N/A
429N/A        def should_reread(self):
429N/A                """This method uses the modification time and the file size
429N/A                to (heuristically) determine whether the file backing this
429N/A                storage has changed since it was last read.
429N/A                """
429N/A                stat_info = os.stat(self._file_path)
1230N/A                if self._inode != stat_info.st_ino or \
1230N/A                    self._mtime != stat_info.st_mtime or \
429N/A                    self._size != stat_info.st_size:
429N/A                        return True
1230N/A                return not self._have_read
1230N/A
1230N/A        def read_dict_file(self):
1230N/A                self._have_read = True
429N/A
429N/A        def open(self, directory):
429N/A                """This uses consistent open to ensure that the version line
429N/A                processing is done consistently and that only a single function
429N/A                actually opens files stored using this class.
429N/A                """
429N/A                return consistent_open([self], directory)
429N/A
429N/A
429N/Aclass IndexStoreMainDict(IndexStoreBase):
429N/A        """Class for representing the main dictionary file
429N/A        """
429N/A        # Here is an example of a line from the main dictionary, it is
429N/A        # explained below:
1100N/A        # %25gconf.xml file!basename@basename#579,13249,13692,77391,77628
429N/A        #
1100N/A        # Each line begins with a urllib quoted search token. It's followed by
1100N/A        # a set of space separated lists.  Each of these lists begin with an
1100N/A        # action type.  It's separated from its sublist by a '!'.  Next is the
1100N/A        # key type, which is separated from its sublist by a '@'.  Next is the
1100N/A        # full value, which is used in set actions to hold the full value which
1100N/A        # matched the token.  It's separated from its sublist by a '#'.  The
1100N/A        # next token (579) is the fmri id.  The subsequent comma separated
1100N/A        # values are the byte offsets into that manifest of the lines containing
1100N/A        # that token.
429N/A
1751N/A        sep_chars = [" ", "!", "@", "#", ","]
1751N/A
429N/A        def __init__(self, file_name):
429N/A                IndexStoreBase.__init__(self, file_name)
429N/A                self._old_suffix = None
429N/A
429N/A        def write_dict_file(self, path, version_num):
429N/A                """This class relies on external methods to write the file.
429N/A                Making this empty call to protected_write_dict_file allows the
429N/A                file to be set up correctly with the version number stored
429N/A                correctly.
429N/A                """
429N/A                IndexStoreBase._protected_write_dict_file(self, path,
429N/A                                                          version_num, [])
429N/A
429N/A        def get_file_handle(self):
429N/A                """Return the file handle. Note that doing
429N/A                anything other than sequential reads or writes
429N/A                to or from this file_handle may result in unexpected
429N/A                behavior. In short, don't use seek.
429N/A                """
429N/A                return self._file_handle
429N/A
429N/A        @staticmethod
429N/A        def parse_main_dict_line(line):
429N/A                """Parses one line of a main dictionary file.
429N/A                Changes to this function must be paired with changes to
429N/A                write_main_dict_line below.
1751N/A
1751N/A                This should produce the same data structure that
1751N/A                _write_main_dict_line in indexer.py creates to write out each
1751N/A                line.
429N/A                """
941N/A
1751N/A                split_chars = IndexStoreMainDict.sep_chars
429N/A                line = line.rstrip('\n')
1751N/A                tmp = line.split(split_chars[0])
3234N/A                tok = unquote(tmp[0])
1751N/A                atl = tmp[1:]
1751N/A                res = []
1751N/A                for ati in atl:
1751N/A                        tmp = ati.split(split_chars[1])
1751N/A                        action_type = tmp[0]
1751N/A                        stl = tmp[1:]
1751N/A                        at_res = []
1751N/A                        for sti in stl:
1751N/A                                tmp = sti.split(split_chars[2])
1751N/A                                subtype = tmp[0]
1751N/A                                fvl = tmp[1:]
1751N/A                                st_res = []
1751N/A                                for fvi in fvl:
1751N/A                                        tmp = fvi.split(split_chars[3])
3234N/A                                        full_value = unquote(tmp[0])
1751N/A                                        pfl = tmp[1:]
1751N/A                                        fv_res = []
1751N/A                                        for pfi in pfl:
1751N/A                                                tmp = pfi.split(split_chars[4])
1751N/A                                                pfmri_index = int(tmp[0])
1751N/A                                                offsets = [
1751N/A                                                    int(t) for t in tmp[1:]
1751N/A                                                ]
1751N/A                                                fv_res.append(
1751N/A                                                    (pfmri_index, offsets))
1751N/A                                        st_res.append((full_value, fv_res))
1751N/A                                at_res.append((subtype, st_res))
1751N/A                        res.append((action_type, at_res))
1751N/A                return tok, res
429N/A
429N/A        @staticmethod
1026N/A        def parse_main_dict_line_for_token(line):
1026N/A                """Pulls the token out of a line from a main dictionary file.
1026N/A                Changes to this function must be paired with changes to
1026N/A                write_main_dict_line below.
1026N/A                """
1026N/A
1026N/A                line = line.rstrip("\n")
1026N/A                lst = line.split(" ", 1)
3234N/A                return unquote(lst[0])
1026N/A
1026N/A        @staticmethod
1751N/A        def transform_main_dict_line(token, entries):
1751N/A                """Paired with parse_main_dict_line above.  Transforms a token
1751N/A                and its data into the string which can be written to the main
1751N/A                dictionary.
1100N/A
1751N/A                The "token" parameter is the token whose index line is being
1751N/A                generated.
1100N/A
1100N/A                The "entries" parameter is a list of lists of lists and so on.
1751N/A                It contains information about where and how "token" was seen in
1751N/A                manifests.  The depth of all lists at each level must be
1751N/A                consistent, and must match the length of "sep_chars" and
1751N/A                "quote".  The details of the contents on entries are described
1751N/A                in _write_main_dict_line in indexer.py.
1100N/A                """
1751N/A                sep_chars = IndexStoreMainDict.sep_chars
3234N/A                res = "{0}".format(quote(str(token)))
1751N/A                for ati, atl in enumerate(entries):
1751N/A                        action_type, atl = atl
3158N/A                        res += "{0}{1}".format(sep_chars[0], action_type)
1751N/A                        for sti, stl in enumerate(atl):
1751N/A                                subtype, stl = stl
3158N/A                                res += "{0}{1}".format(sep_chars[1], subtype)
1751N/A                                for fvi, fvl in enumerate(stl):
1751N/A                                        full_value, fvl = fvl
3158N/A                                        res += "{0}{1}".format(sep_chars[2],
3234N/A                                            quote(str(full_value)))
1751N/A                                        for pfi, pfl in enumerate(fvl):
1751N/A                                                pfmri_index, pfl = pfl
3158N/A                                                res += "{0}{1}".format(sep_chars[3],
1751N/A                                                    pfmri_index)
1751N/A                                                for offset in pfl:
3158N/A                                                        res += "{0}{1}".format(
3158N/A                                                            sep_chars[4],
1751N/A                                                            offset)
1751N/A                return res + "\n"
1100N/A
429N/A        def count_entries_removed_during_partial_indexing(self):
429N/A                """Returns the number of entries removed during a second phase
429N/A                of indexing.
429N/A                """
429N/A                # This returns 0 because this class is not responsible for
429N/A                # storing anything in memory.
429N/A                return 0
429N/A
429N/A        def shift_file(self, use_dir, suffix):
429N/A                """Moves the existing file with self._name in directory
429N/A                use_dir to a new file named self._name + suffix in directory
429N/A                use_dir. If it has done this previously, it removes the old
429N/A                file it moved. It also opens the newly moved file and uses
429N/A                that as the file for its file handle.
429N/A                """
429N/A                assert self._file_handle is None
429N/A                orig_path = os.path.join(use_dir, self._name)
429N/A                new_path = os.path.join(use_dir, self._name + suffix)
429N/A                portable.rename(orig_path, new_path)
429N/A                tmp_name = self._name
429N/A                self._name = self._name + suffix
429N/A                self.open(use_dir)
429N/A                self._name = tmp_name
429N/A                if self._old_suffix is not None:
429N/A                        os.remove(os.path.join(use_dir, self._old_suffix))
429N/A                self._old_suffix = self._name + suffix
429N/A
429N/A
429N/Aclass IndexStoreListDict(IndexStoreBase):
429N/A        """Used when both a list and a dictionary are needed to
429N/A        store the information. Used for bidirectional lookup when
429N/A        one item is an int (an id) and the other is not (an entity). It
429N/A        maintains a list of empty spots in the list so that adding entities
429N/A        can take advantage of unused space. It encodes empty space as a blank
429N/A        line in the file format and '' in the internal list.
429N/A        """
429N/A
1054N/A        def __init__(self, file_name, build_function=lambda x: x,
1054N/A            decode_function=lambda x: x):
429N/A                IndexStoreBase.__init__(self, file_name)
429N/A                self._list = []
429N/A                self._dict = {}
429N/A                self._next_id = 0
429N/A                self._list_of_empties = []
1054N/A                self._decode_func = decode_function
429N/A                self._build_func = build_function
429N/A                self._line_cnt = 0
429N/A
429N/A        def add_entity(self, entity, is_empty):
429N/A                """Adds an entity consistently to the list and dictionary
429N/A                allowing bidirectional lookup.
429N/A                """
429N/A                assert (len(self._list) == self._next_id)
429N/A                if self._list_of_empties and not is_empty:
429N/A                        use_id = self._list_of_empties.pop(0)
429N/A                        assert use_id <= len(self._list)
429N/A                        if use_id == len(self._list):
429N/A                                self._list.append(entity)
429N/A                                self._next_id += 1
429N/A                        else:
429N/A                                self._list[use_id] = entity
429N/A                else:
429N/A                        use_id = self._next_id
429N/A                        self._list.append(entity)
429N/A                        self._next_id += 1
429N/A                if not(is_empty):
429N/A                        self._dict[entity] = use_id
429N/A                assert (len(self._list) == self._next_id)
429N/A                return use_id
429N/A
429N/A        def remove_id(self, in_id):
429N/A                """deletes in_id from the list and the dictionary """
429N/A                entity = self._list[in_id]
429N/A                self._list[in_id] = ""
429N/A                self._dict[entity] = ""
429N/A
429N/A        def remove_entity(self, entity):
429N/A                """deletes the entity from the list and the dictionary """
429N/A                in_id = self._dict[entity]
429N/A                self._dict[entity] = ""
429N/A                self._list[in_id] = ""
429N/A
429N/A        def get_id(self, entity):
429N/A                """returns the id of entity """
429N/A                return self._dict[entity]
429N/A
429N/A        def get_id_and_add(self, entity):
429N/A                """Adds entity if it's not previously stored and returns the
3234N/A                id for entity.
429N/A                """
429N/A                # This code purposefully reimplements add_entity
429N/A                # code. Replacing the function calls to has_entity, add_entity,
429N/A                # and get_id with direct access to the data structure gave a
429N/A                # speed up of a factor of 4. Because this is a very hot path,
429N/A                # the tradeoff seemed appropriate.
429N/A
3194N/A                if entity not in self._dict:
429N/A                        assert (len(self._list) == self._next_id)
429N/A                        if self._list_of_empties:
429N/A                                use_id = self._list_of_empties.pop(0)
429N/A                                assert use_id <= len(self._list)
429N/A                                if use_id == len(self._list):
429N/A                                        self._list.append(entity)
429N/A                                        self._next_id += 1
429N/A                                else:
429N/A                                        self._list[use_id] = entity
429N/A                        else:
429N/A                                use_id = self._next_id
429N/A                                self._list.append(entity)
429N/A                                self._next_id += 1
429N/A                        self._dict[entity] = use_id
429N/A                assert (len(self._list) == self._next_id)
429N/A                return self._dict[entity]
429N/A
429N/A        def get_entity(self, in_id):
429N/A                """return the entity in_id maps to """
429N/A                return self._list[in_id]
429N/A
429N/A        def has_entity(self, entity):
429N/A                """check if entity is in storage """
3194N/A                return entity in self._dict
429N/A
429N/A        def has_empty(self):
429N/A                """Check if the structure has any empty elements which
429N/A                can be filled with data.
429N/A                """
429N/A                return (len(self._list_of_empties) > 0)
429N/A
429N/A        def get_next_empty(self):
429N/A                """returns the next id which maps to no element """
429N/A                return self._list_of_empties.pop()
429N/A
429N/A        def write_dict_file(self, path, version_num):
429N/A                """Passes self._list to the parent class to write to a file.
429N/A                """
429N/A                IndexStoreBase._protected_write_dict_file(self, path,
1054N/A                    version_num, (self._decode_func(l) for l in self._list))
429N/A        def read_dict_file(self):
429N/A                """Reads in a dictionary previously stored using the above
429N/A                call
429N/A                """
429N/A                assert self._file_handle
1230N/A                self._dict.clear()
1230N/A                self._list = []
1230N/A                for i, line in enumerate(self._file_handle):
1230N/A                        # A blank line means that id can be reused.
1230N/A                        tmp = self._build_func(line.rstrip("\n"))
1230N/A                        if line == "\n":
1230N/A                                self._list_of_empties.append(i)
1230N/A                        else:
1230N/A                                self._dict[tmp] = i
1230N/A                        self._list.append(tmp)
1230N/A                        self._line_cnt = i + 1
1230N/A                        self._next_id = i + 1
1230N/A                IndexStoreBase.read_dict_file(self)
429N/A                return self._line_cnt
429N/A
429N/A        def count_entries_removed_during_partial_indexing(self):
429N/A                """Returns the number of entries removed during a second phase
429N/A                of indexing.
429N/A                """
429N/A                return len(self._list)
429N/A
429N/Aclass IndexStoreDict(IndexStoreBase):
429N/A        """Class used when only entity -> id lookup is needed
429N/A        """
429N/A
429N/A        def __init__(self, file_name):
429N/A                IndexStoreBase.__init__(self, file_name)
429N/A                self._dict = {}
429N/A                self._next_id = 0
429N/A
429N/A        def get_dict(self):
429N/A                return self._dict
429N/A
429N/A        def get_entity(self, in_id):
429N/A                return self._dict[in_id]
429N/A
429N/A        def has_entity(self, entity):
3194N/A                return entity in self._dict
429N/A
429N/A        def read_dict_file(self):
429N/A                """Reads in a dictionary stored in line number -> entity
429N/A                format
429N/A                """
1230N/A                self._dict.clear()
1230N/A                for line_cnt, line in enumerate(self._file_handle):
1230N/A                        line = line.rstrip("\n")
1230N/A                        self._dict[line_cnt] = line
1230N/A                IndexStoreBase.read_dict_file(self)
429N/A
429N/A        def count_entries_removed_during_partial_indexing(self):
429N/A                """Returns the number of entries removed during a second phase
429N/A                of indexing.
429N/A                """
429N/A                return len(self._dict)
429N/A
429N/Aclass IndexStoreDictMutable(IndexStoreBase):
429N/A        """Dictionary which allows dynamic update of its storage
429N/A        """
429N/A
429N/A        def __init__(self, file_name):
429N/A                IndexStoreBase.__init__(self, file_name)
429N/A                self._dict = {}
429N/A
429N/A        def get_dict(self):
429N/A                return self._dict
429N/A
429N/A        def has_entity(self, entity):
3194N/A                return entity in self._dict
429N/A
429N/A        def get_id(self, entity):
429N/A                return self._dict[entity]
429N/A
499N/A        def get_keys(self):
3234N/A                return list(self._dict.keys())
499N/A
546N/A        @staticmethod
546N/A        def __quote(str):
546N/A                if " " in str:
3234N/A                        return "1" + quote(str)
546N/A                else:
546N/A                        return "0" + str
546N/A
429N/A        def read_dict_file(self):
429N/A                """Reads in a dictionary stored in with an entity
429N/A                and its number on each line.
429N/A                """
1230N/A                self._dict.clear()
1230N/A                for line in self._file_handle:
1653N/A                        token, offset = line.split(" ")
1653N/A                        if token[0] == "1":
3234N/A                                token = unquote(token[1:])
1653N/A                        else:
1653N/A                                token = token[1:]
1653N/A                        offset = int(offset)
1230N/A                        self._dict[token] = offset
1230N/A                IndexStoreBase.read_dict_file(self)
429N/A
429N/A        def open_out_file(self, use_dir, version_num):
429N/A                """Opens the output file for this class and prepares it
429N/A                to be written via write_entity.
429N/A                """
546N/A                self.write_dict_file(use_dir, version_num)
429N/A                self._file_handle = open(os.path.join(use_dir, self._name),
3339N/A                    'a', buffering=PKG_FILE_BUFSIZ)
429N/A
429N/A        def write_entity(self, entity, my_id):
429N/A                """Writes the entity out to the file with my_id """
429N/A                assert self._file_handle is not None
941N/A                self._file_handle.write(self.__quote(str(entity)) + " " +
941N/A                    str(my_id) + "\n")
429N/A
429N/A        def write_dict_file(self, path, version_num):
429N/A                """ Generates an iterable list of string representations of
429N/A                the dictionary that the parent's protected_write_dict_file
429N/A                function can call.
429N/A                """
429N/A                IndexStoreBase._protected_write_dict_file(self, path,
429N/A                    version_num, [])
429N/A
429N/A        def count_entries_removed_during_partial_indexing(self):
429N/A                """Returns the number of entries removed during a second phase
429N/A                of indexing.
429N/A                """
429N/A                return 0
429N/A
516N/Aclass IndexStoreSetHash(IndexStoreBase):
516N/A        def __init__(self, file_name):
516N/A                IndexStoreBase.__init__(self, file_name)
2962N/A                # In order to interoperate with older clients, we must use sha-1
2962N/A                # here.
1516N/A                self.hash_val = hashlib.sha1().hexdigest()
1230N/A
516N/A        def set_hash(self, vals):
516N/A                """Set the has value."""
2962N/A                self.hash_val = self.calc_hash(vals)
516N/A
516N/A        def calc_hash(self, vals):
516N/A                """Calculate the hash value of the sorted members of vals."""
516N/A                vl = list(vals)
516N/A                vl.sort()
2962N/A                # In order to interoperate with older clients, we must use sha-1
2962N/A                # here.
1516N/A                shasum = hashlib.sha1()
516N/A                for v in vl:
3339N/A                         # Unicode-objects must be encoded before hashing.
3339N/A                         shasum.update(force_bytes(v))
516N/A                return shasum.hexdigest()
2962N/A
516N/A        def write_dict_file(self, path, version_num):
516N/A                """Write self.hash_val out to a line in a file """
516N/A                IndexStoreBase._protected_write_dict_file(self, path,
516N/A                    version_num, [self.hash_val])
516N/A
516N/A        def read_dict_file(self):
516N/A                """Process a dictionary file written using the above method
516N/A                """
1230N/A                sp = self._file_handle.tell()
1230N/A                res = 0
1230N/A                for res, line in enumerate(self._file_handle):
1230N/A                        assert res < 1
1230N/A                        self.hash_val = line.rstrip()
1230N/A                self._file_handle.seek(sp)
1230N/A                IndexStoreBase.read_dict_file(self)
1230N/A                return res
941N/A
516N/A        def check_against_file(self, vals):
516N/A                """Check the hash value of vals against the value stored
516N/A                in the file for this object."""
1230N/A                if not self._have_read:
1230N/A                        self.read_dict_file()
516N/A                incoming_hash = self.calc_hash(vals)
941N/A                if self.hash_val != incoming_hash:
941N/A                        raise search_errors.IncorrectIndexFileHash(
941N/A                            self.hash_val, incoming_hash)
516N/A
516N/A        def count_entries_removed_during_partial_indexing(self):
516N/A                """Returns the number of entries removed during a second phase
516N/A                of indexing."""
516N/A                return 0
3234N/A
429N/Aclass IndexStoreSet(IndexStoreBase):
429N/A        """Used when only set membership is desired.
429N/A        This is currently designed for exclusive use
429N/A        with storage of fmri.PkgFmris. However, that impact
429N/A        is only seen in the read_and_discard_matching_from_argument
429N/A        method.
429N/A        """
429N/A        def __init__(self, file_name):
429N/A                IndexStoreBase.__init__(self, file_name)
429N/A                self._set = set()
429N/A
429N/A        def get_set(self):
429N/A                return self._set
429N/A
1354N/A        def clear(self):
1354N/A                self._set.clear()
1354N/A
429N/A        def add_entity(self, entity):
429N/A                self._set.add(entity)
429N/A
429N/A        def remove_entity(self, entity):
429N/A                """Remove entity purposfully assumes that entity is
429N/A                already in the set to be removed. This is useful for
429N/A                error checking and debugging.
429N/A                """
429N/A                self._set.remove(entity)
429N/A
429N/A        def has_entity(self, entity):
429N/A                return (entity in self._set)
429N/A
429N/A        def write_dict_file(self, path, version_num):
429N/A                """Write each member of the set out to a line in a file """
429N/A                IndexStoreBase._protected_write_dict_file(self, path,
429N/A                    version_num, self._set)
429N/A
429N/A        def read_dict_file(self):
429N/A                """Process a dictionary file written using the above method
429N/A                """
429N/A                assert self._file_handle
429N/A                res = 0
1230N/A                self._set.clear()
1230N/A                for i, line in enumerate(self._file_handle):
1230N/A                        line = line.rstrip("\n")
1230N/A                        assert i == len(self._set)
1230N/A                        self.add_entity(line)
1230N/A                        res = i + 1
1230N/A                IndexStoreBase.read_dict_file(self)
429N/A                return res
429N/A
429N/A        def read_and_discard_matching_from_argument(self, fmri_set):
429N/A                """Reads the file and removes all frmis in the file
429N/A                from fmri_set.
429N/A                """
429N/A                if self._file_handle:
429N/A                        for line in self._file_handle:
429N/A                                f = fmri.PkgFmri(line)
429N/A                                fmri_set.discard(f)
429N/A
429N/A        def count_entries_removed_during_partial_indexing(self):
429N/A                """Returns the number of entries removed during a second phase
429N/A                of indexing."""
429N/A                return len(self._set)
1054N/A
1054N/A
1054N/Aclass InvertedDict(IndexStoreBase):
1054N/A        """Class used to store and process fmri to offset mappings.  It does
1054N/A        delta compression and deduplication of shared offset sets when writing
1054N/A        to a file."""
1054N/A
1054N/A        def __init__(self, file_name, p_id_trans):
1054N/A                """file_name is the name of the file to write to or read from.
1054N/A                p_id_trans is an object which has a get entity method which,
1054N/A                when given a package id number returns the PkgFmri object
1054N/A                for that id number."""
3234N/A
1054N/A                IndexStoreBase.__init__(self, file_name)
1054N/A                self._p_id_trans = p_id_trans
1054N/A                self._dict = {}
1054N/A                self._fmri_offsets = {}
1054N/A
1054N/A        def __copy__(self):
1054N/A                return self.__class__(self._name, self._p_id_trans)
1054N/A
1054N/A        def add_pair(self, p_id, offset):
1054N/A                """Adds a package id number and an associated offset to the
1054N/A                existing dictionary."""
1054N/A
1054N/A                try:
1054N/A                        self._fmri_offsets[p_id].append(offset)
1054N/A                except KeyError:
1054N/A                        self._fmri_offsets[p_id] = [offset]
3234N/A
1054N/A        def invert_id_to_offsets_dict(self):
1054N/A                """Does delta encoding of offsets to reduce space by only
1054N/A                storing the difference between the current offset and the
1054N/A                previous offset.  It also performs deduplication so that all
1054N/A                packages with the same set of offsets share a common bucket."""
1054N/A
1054N/A                inv = {}
3339N/A                for p_id in list(self._fmri_offsets.keys()):
1054N/A                        old_o = 0
1054N/A                        bucket = []
1054N/A                        for o in sorted(set(self._fmri_offsets[p_id])):
1054N/A                                bucket.append(o - old_o)
1054N/A                                old_o = o
1054N/A                        h = " ".join([str(o) for o in bucket])
1054N/A                        del self._fmri_offsets[p_id]
1054N/A                        if h not in inv:
1054N/A                                inv[h] = []
1054N/A                        inv[h].append(p_id)
1054N/A                return inv
1054N/A
1054N/A        @staticmethod
1054N/A        def __make_line(offset_str, p_ids, trans):
1054N/A                """For a given offset string, a list of package id numbers,
1054N/A                and a translator from package id numbers to PkgFmris, returns
1054N/A                the string which represents that information. Its format is
1054N/A                space separated package fmris, followed by a !, followed by
1054N/A                space separated offsets which have had delta compression
1054N/A                performed."""
1054N/A
1054N/A                return " ".join([
1054N/A                    trans.get_entity(p_id).get_fmri(anarchy=True,
1054N/A                        include_scheme=False)
1054N/A                    for p_id in p_ids
1054N/A                    ]) + "!" + offset_str
3234N/A
1054N/A        def write_dict_file(self, path, version_num):
1054N/A                """Write the mapping of package fmris to offset sets out
1054N/A                to the file."""
1054N/A
1054N/A                inv = self.invert_id_to_offsets_dict()
1054N/A                IndexStoreBase._protected_write_dict_file(self, path,
1054N/A                    version_num, (
1054N/A                        self.__make_line(o, inv[o], self._p_id_trans)
1054N/A                        for o in inv
1054N/A                    ))
1054N/A
1054N/A        def read_dict_file(self):
1054N/A                """Read a file written by the above function and store the
1054N/A                information in a dictionary."""
1054N/A
1054N/A                assert self._file_handle
1230N/A                for l in self._file_handle:
1230N/A                        fmris, offs = l.split("!")
1230N/A                        self._dict[fmris] = offs
1230N/A                IndexStoreBase.read_dict_file(self)
1054N/A
1054N/A        @staticmethod
1054N/A        def de_delta(offs):
1054N/A                """For a list of strings of offsets, undo the delta compression
1054N/A                that has been performed."""
1054N/A
1054N/A                old_o = 0
1054N/A                ret = []
1054N/A                for o in offs:
1054N/A                        o = int(o) + old_o
1054N/A                        ret.append(o)
1054N/A                        old_o = o
1054N/A                return ret
3234N/A
1054N/A        def get_offsets(self, match_func):
1054N/A                """For a given function which returns true if it matches the
1054N/A                desired fmri, return the offsets which are associated with the
1054N/A                fmris which match."""
1054N/A
1054N/A                offs = []
1054N/A                for fmris in self._dict.keys():
1054N/A                        for p in fmris.split():
1054N/A                                if match_func(p):
1054N/A                                        offs.extend(self.de_delta(
1054N/A                                            self._dict[fmris].split()))
1054N/A                                        break
1054N/A                return set(offs)