429N/A#!/usr/bin/python
429N/A#
429N/A# CDDL HEADER START
429N/A#
429N/A# The contents of this file are subject to the terms of the
429N/A# Common Development and Distribution License (the "License").
429N/A# You may not use this file except in compliance with the License.
429N/A#
429N/A# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
429N/A# or http://www.opensolaris.org/os/licensing.
429N/A# See the License for the specific language governing permissions
429N/A# and limitations under the License.
429N/A#
429N/A# When distributing Covered Code, include this CDDL HEADER in each
429N/A# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
429N/A# If applicable, add the following below this CDDL HEADER, with the
429N/A# fields enclosed by brackets "[]" replaced with your own identifying
429N/A# information: Portions Copyright [yyyy] [name of copyright owner]
429N/A#
429N/A# CDDL HEADER END
429N/A#
941N/A
941N/A#
3339N/A# Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
941N/A#
429N/A
429N/Aimport os
429N/Aimport errno
429N/Aimport time
1516N/Aimport hashlib
3234N/Afrom six.moves.urllib.parse import quote, unquote
429N/A
429N/Aimport pkg.fmri as fmri
429N/Aimport pkg.search_errors as search_errors
429N/Aimport pkg.portable as portable
3339N/Afrom pkg.misc import PKG_FILE_BUFSIZ, force_bytes
429N/A
941N/AFAST_ADD = 'fast_add.v1'
941N/AFAST_REMOVE = 'fast_remove.v1'
941N/AMANIFEST_LIST = 'manf_list.v1'
546N/AFULL_FMRI_FILE = 'full_fmri_list'
941N/AMAIN_FILE = 'main_dict.ascii.v2'
546N/ABYTE_OFFSET_FILE = 'token_byte_offset.v1'
546N/AFULL_FMRI_HASH_FILE = 'full_fmri_list.hash'
1054N/AFMRI_OFFSETS_FILE = 'fmri_offsets.v1'
546N/A
516N/Adef consistent_open(data_list, directory, timeout = 1):
429N/A """Opens all data holders in data_list and ensures that the
429N/A versions are consistent among all of them.
429N/A It retries several times in case a race condition between file
429N/A migration and open is encountered.
429N/A Note: Do not set timeout to be 0. It will cause an exception to be
429N/A immediately raised.
1100N/A """
429N/A
429N/A missing = None
429N/A cur_version = None
429N/A
429N/A start_time = time.time()
429N/A
429N/A while cur_version == None and missing != True:
441N/A # The assignments to cur_version and missing cannot be
441N/A # placed here. They must be reset prior to breaking out of the
441N/A # for loop so that the while loop condition will be true. They
441N/A # cannot be placed after the for loop since that path is taken
441N/A # when all files are missing or opened successfully.
429N/A if timeout != None and ((time.time() - start_time) > timeout):
429N/A raise search_errors.InconsistentIndexException(
429N/A directory)
429N/A for d in data_list:
429N/A # All indexes must have the same version and all must
429N/A # either be present or absent for a successful return.
429N/A # If one of these conditions is not met, the function
429N/A # tries again until it succeeds or the time spent in
429N/A # in the function is greater than timeout.
429N/A try:
429N/A f = os.path.join(directory, d.get_file_name())
3339N/A fh = open(f, 'r')
429N/A # If we get here, then the current index file
429N/A # is present.
429N/A if missing == None:
429N/A missing = False
429N/A elif missing:
429N/A for dl in data_list:
429N/A dl.close_file_handle()
429N/A missing = None
429N/A cur_version = None
441N/A break
429N/A d.set_file_handle(fh, f)
546N/A version_tmp = fh.readline()
429N/A version_num = \
429N/A int(version_tmp.split(' ')[1].rstrip('\n'))
429N/A # Read the version. If this is the first file,
429N/A # set the expected version otherwise check that
429N/A # the version matches the expected version.
429N/A if cur_version == None:
429N/A cur_version = version_num
429N/A elif not (cur_version == version_num):
429N/A # Got inconsistent versions, so close
429N/A # all files and try again.
429N/A for d in data_list:
429N/A d.close_file_handle()
429N/A missing = None
429N/A cur_version = None
441N/A break
3171N/A except IOError as e:
429N/A if e.errno == errno.ENOENT:
429N/A # If the index file is missing, ensure
429N/A # that previous files were missing as
429N/A # well. If not, try again.
429N/A if missing == False:
429N/A for d in data_list:
429N/A d.close_file_handle()
429N/A missing = None
429N/A cur_version = None
441N/A break
429N/A missing = True
429N/A else:
429N/A for d in data_list:
429N/A d.close_file_handle()
429N/A raise
429N/A if missing:
429N/A assert cur_version == None
429N/A # The index is missing (ie, no files were present).
429N/A return None
429N/A else:
429N/A assert cur_version is not None
429N/A return cur_version
429N/A
429N/A
429N/Aclass IndexStoreBase(object):
429N/A """Base class for all data storage used by the indexer and
429N/A queryEngine. All members must have a file name and maintain
429N/A an internal file handle to that file as instructed by external
429N/A calls.
429N/A """
429N/A
429N/A def __init__(self, file_name):
429N/A self._name = file_name
429N/A self._file_handle = None
429N/A self._file_path = None
429N/A self._size = None
429N/A self._mtime = None
1230N/A self._inode = None
1230N/A self._have_read = False
429N/A
429N/A def get_file_name(self):
429N/A return self._name
429N/A
429N/A def set_file_handle(self, f_handle, f_path):
429N/A if self._file_handle:
429N/A raise RuntimeError("setting an extant file handle, "
429N/A "must close first, fp is: " + f_path)
429N/A else:
429N/A self._file_handle = f_handle
429N/A self._file_path = f_path
3234N/A if self._mtime is None:
1230N/A stat_info = os.stat(self._file_path)
1230N/A self._mtime = stat_info.st_mtime
1230N/A self._size = stat_info.st_size
1230N/A self._inode = stat_info.st_ino
429N/A
429N/A def get_file_path(self):
429N/A return self._file_path
429N/A
941N/A def __copy__(self):
941N/A return self.__class__(self._name)
941N/A
429N/A def close_file_handle(self):
429N/A """Closes the file handle and clears it so that it cannot
429N/A be reused.
429N/A """
1230N/A
429N/A if self._file_handle:
429N/A self._file_handle.close()
429N/A self._file_handle = None
429N/A
429N/A def _protected_write_dict_file(self, path, version_num, iterable):
429N/A """Writes the dictionary in the expected format.
429N/A Note: Only child classes should call this method.
429N/A """
429N/A version_string = "VERSION: "
3339N/A file_handle = open(os.path.join(path, self._name), 'w')
429N/A file_handle.write(version_string + str(version_num) + "\n")
429N/A for name in iterable:
429N/A file_handle.write(str(name) + "\n")
429N/A file_handle.close()
429N/A
429N/A def should_reread(self):
429N/A """This method uses the modification time and the file size
429N/A to (heuristically) determine whether the file backing this
429N/A storage has changed since it was last read.
429N/A """
429N/A stat_info = os.stat(self._file_path)
1230N/A if self._inode != stat_info.st_ino or \
1230N/A self._mtime != stat_info.st_mtime or \
429N/A self._size != stat_info.st_size:
429N/A return True
1230N/A return not self._have_read
1230N/A
1230N/A def read_dict_file(self):
1230N/A self._have_read = True
429N/A
429N/A def open(self, directory):
429N/A """This uses consistent open to ensure that the version line
429N/A processing is done consistently and that only a single function
429N/A actually opens files stored using this class.
429N/A """
429N/A return consistent_open([self], directory)
429N/A
429N/A
429N/Aclass IndexStoreMainDict(IndexStoreBase):
429N/A """Class for representing the main dictionary file
429N/A """
429N/A # Here is an example of a line from the main dictionary, it is
429N/A # explained below:
1100N/A # %25gconf.xml file!basename@basename#579,13249,13692,77391,77628
429N/A #
1100N/A # Each line begins with a urllib quoted search token. It's followed by
1100N/A # a set of space separated lists. Each of these lists begin with an
1100N/A # action type. It's separated from its sublist by a '!'. Next is the
1100N/A # key type, which is separated from its sublist by a '@'. Next is the
1100N/A # full value, which is used in set actions to hold the full value which
1100N/A # matched the token. It's separated from its sublist by a '#'. The
1100N/A # next token (579) is the fmri id. The subsequent comma separated
1100N/A # values are the byte offsets into that manifest of the lines containing
1100N/A # that token.
429N/A
1751N/A sep_chars = [" ", "!", "@", "#", ","]
1751N/A
429N/A def __init__(self, file_name):
429N/A IndexStoreBase.__init__(self, file_name)
429N/A self._old_suffix = None
429N/A
429N/A def write_dict_file(self, path, version_num):
429N/A """This class relies on external methods to write the file.
429N/A Making this empty call to protected_write_dict_file allows the
429N/A file to be set up correctly with the version number stored
429N/A correctly.
429N/A """
429N/A IndexStoreBase._protected_write_dict_file(self, path,
429N/A version_num, [])
429N/A
429N/A def get_file_handle(self):
429N/A """Return the file handle. Note that doing
429N/A anything other than sequential reads or writes
429N/A to or from this file_handle may result in unexpected
429N/A behavior. In short, don't use seek.
429N/A """
429N/A return self._file_handle
429N/A
429N/A @staticmethod
429N/A def parse_main_dict_line(line):
429N/A """Parses one line of a main dictionary file.
429N/A Changes to this function must be paired with changes to
429N/A write_main_dict_line below.
1751N/A
1751N/A This should produce the same data structure that
1751N/A _write_main_dict_line in indexer.py creates to write out each
1751N/A line.
429N/A """
941N/A
1751N/A split_chars = IndexStoreMainDict.sep_chars
429N/A line = line.rstrip('\n')
1751N/A tmp = line.split(split_chars[0])
3234N/A tok = unquote(tmp[0])
1751N/A atl = tmp[1:]
1751N/A res = []
1751N/A for ati in atl:
1751N/A tmp = ati.split(split_chars[1])
1751N/A action_type = tmp[0]
1751N/A stl = tmp[1:]
1751N/A at_res = []
1751N/A for sti in stl:
1751N/A tmp = sti.split(split_chars[2])
1751N/A subtype = tmp[0]
1751N/A fvl = tmp[1:]
1751N/A st_res = []
1751N/A for fvi in fvl:
1751N/A tmp = fvi.split(split_chars[3])
3234N/A full_value = unquote(tmp[0])
1751N/A pfl = tmp[1:]
1751N/A fv_res = []
1751N/A for pfi in pfl:
1751N/A tmp = pfi.split(split_chars[4])
1751N/A pfmri_index = int(tmp[0])
1751N/A offsets = [
1751N/A int(t) for t in tmp[1:]
1751N/A ]
1751N/A fv_res.append(
1751N/A (pfmri_index, offsets))
1751N/A st_res.append((full_value, fv_res))
1751N/A at_res.append((subtype, st_res))
1751N/A res.append((action_type, at_res))
1751N/A return tok, res
429N/A
429N/A @staticmethod
1026N/A def parse_main_dict_line_for_token(line):
1026N/A """Pulls the token out of a line from a main dictionary file.
1026N/A Changes to this function must be paired with changes to
1026N/A write_main_dict_line below.
1026N/A """
1026N/A
1026N/A line = line.rstrip("\n")
1026N/A lst = line.split(" ", 1)
3234N/A return unquote(lst[0])
1026N/A
1026N/A @staticmethod
1751N/A def transform_main_dict_line(token, entries):
1751N/A """Paired with parse_main_dict_line above. Transforms a token
1751N/A and its data into the string which can be written to the main
1751N/A dictionary.
1100N/A
1751N/A The "token" parameter is the token whose index line is being
1751N/A generated.
1100N/A
1100N/A The "entries" parameter is a list of lists of lists and so on.
1751N/A It contains information about where and how "token" was seen in
1751N/A manifests. The depth of all lists at each level must be
1751N/A consistent, and must match the length of "sep_chars" and
1751N/A "quote". The details of the contents on entries are described
1751N/A in _write_main_dict_line in indexer.py.
1100N/A """
1751N/A sep_chars = IndexStoreMainDict.sep_chars
3234N/A res = "{0}".format(quote(str(token)))
1751N/A for ati, atl in enumerate(entries):
1751N/A action_type, atl = atl
3158N/A res += "{0}{1}".format(sep_chars[0], action_type)
1751N/A for sti, stl in enumerate(atl):
1751N/A subtype, stl = stl
3158N/A res += "{0}{1}".format(sep_chars[1], subtype)
1751N/A for fvi, fvl in enumerate(stl):
1751N/A full_value, fvl = fvl
3158N/A res += "{0}{1}".format(sep_chars[2],
3234N/A quote(str(full_value)))
1751N/A for pfi, pfl in enumerate(fvl):
1751N/A pfmri_index, pfl = pfl
3158N/A res += "{0}{1}".format(sep_chars[3],
1751N/A pfmri_index)
1751N/A for offset in pfl:
3158N/A res += "{0}{1}".format(
3158N/A sep_chars[4],
1751N/A offset)
1751N/A return res + "\n"
1100N/A
429N/A def count_entries_removed_during_partial_indexing(self):
429N/A """Returns the number of entries removed during a second phase
429N/A of indexing.
429N/A """
429N/A # This returns 0 because this class is not responsible for
429N/A # storing anything in memory.
429N/A return 0
429N/A
429N/A def shift_file(self, use_dir, suffix):
429N/A """Moves the existing file with self._name in directory
429N/A use_dir to a new file named self._name + suffix in directory
429N/A use_dir. If it has done this previously, it removes the old
429N/A file it moved. It also opens the newly moved file and uses
429N/A that as the file for its file handle.
429N/A """
429N/A assert self._file_handle is None
429N/A orig_path = os.path.join(use_dir, self._name)
429N/A new_path = os.path.join(use_dir, self._name + suffix)
429N/A portable.rename(orig_path, new_path)
429N/A tmp_name = self._name
429N/A self._name = self._name + suffix
429N/A self.open(use_dir)
429N/A self._name = tmp_name
429N/A if self._old_suffix is not None:
429N/A os.remove(os.path.join(use_dir, self._old_suffix))
429N/A self._old_suffix = self._name + suffix
429N/A
429N/A
429N/Aclass IndexStoreListDict(IndexStoreBase):
429N/A """Used when both a list and a dictionary are needed to
429N/A store the information. Used for bidirectional lookup when
429N/A one item is an int (an id) and the other is not (an entity). It
429N/A maintains a list of empty spots in the list so that adding entities
429N/A can take advantage of unused space. It encodes empty space as a blank
429N/A line in the file format and '' in the internal list.
429N/A """
429N/A
1054N/A def __init__(self, file_name, build_function=lambda x: x,
1054N/A decode_function=lambda x: x):
429N/A IndexStoreBase.__init__(self, file_name)
429N/A self._list = []
429N/A self._dict = {}
429N/A self._next_id = 0
429N/A self._list_of_empties = []
1054N/A self._decode_func = decode_function
429N/A self._build_func = build_function
429N/A self._line_cnt = 0
429N/A
429N/A def add_entity(self, entity, is_empty):
429N/A """Adds an entity consistently to the list and dictionary
429N/A allowing bidirectional lookup.
429N/A """
429N/A assert (len(self._list) == self._next_id)
429N/A if self._list_of_empties and not is_empty:
429N/A use_id = self._list_of_empties.pop(0)
429N/A assert use_id <= len(self._list)
429N/A if use_id == len(self._list):
429N/A self._list.append(entity)
429N/A self._next_id += 1
429N/A else:
429N/A self._list[use_id] = entity
429N/A else:
429N/A use_id = self._next_id
429N/A self._list.append(entity)
429N/A self._next_id += 1
429N/A if not(is_empty):
429N/A self._dict[entity] = use_id
429N/A assert (len(self._list) == self._next_id)
429N/A return use_id
429N/A
429N/A def remove_id(self, in_id):
429N/A """deletes in_id from the list and the dictionary """
429N/A entity = self._list[in_id]
429N/A self._list[in_id] = ""
429N/A self._dict[entity] = ""
429N/A
429N/A def remove_entity(self, entity):
429N/A """deletes the entity from the list and the dictionary """
429N/A in_id = self._dict[entity]
429N/A self._dict[entity] = ""
429N/A self._list[in_id] = ""
429N/A
429N/A def get_id(self, entity):
429N/A """returns the id of entity """
429N/A return self._dict[entity]
429N/A
429N/A def get_id_and_add(self, entity):
429N/A """Adds entity if it's not previously stored and returns the
3234N/A id for entity.
429N/A """
429N/A # This code purposefully reimplements add_entity
429N/A # code. Replacing the function calls to has_entity, add_entity,
429N/A # and get_id with direct access to the data structure gave a
429N/A # speed up of a factor of 4. Because this is a very hot path,
429N/A # the tradeoff seemed appropriate.
429N/A
3194N/A if entity not in self._dict:
429N/A assert (len(self._list) == self._next_id)
429N/A if self._list_of_empties:
429N/A use_id = self._list_of_empties.pop(0)
429N/A assert use_id <= len(self._list)
429N/A if use_id == len(self._list):
429N/A self._list.append(entity)
429N/A self._next_id += 1
429N/A else:
429N/A self._list[use_id] = entity
429N/A else:
429N/A use_id = self._next_id
429N/A self._list.append(entity)
429N/A self._next_id += 1
429N/A self._dict[entity] = use_id
429N/A assert (len(self._list) == self._next_id)
429N/A return self._dict[entity]
429N/A
429N/A def get_entity(self, in_id):
429N/A """return the entity in_id maps to """
429N/A return self._list[in_id]
429N/A
429N/A def has_entity(self, entity):
429N/A """check if entity is in storage """
3194N/A return entity in self._dict
429N/A
429N/A def has_empty(self):
429N/A """Check if the structure has any empty elements which
429N/A can be filled with data.
429N/A """
429N/A return (len(self._list_of_empties) > 0)
429N/A
429N/A def get_next_empty(self):
429N/A """returns the next id which maps to no element """
429N/A return self._list_of_empties.pop()
429N/A
429N/A def write_dict_file(self, path, version_num):
429N/A """Passes self._list to the parent class to write to a file.
429N/A """
429N/A IndexStoreBase._protected_write_dict_file(self, path,
1054N/A version_num, (self._decode_func(l) for l in self._list))
429N/A def read_dict_file(self):
429N/A """Reads in a dictionary previously stored using the above
429N/A call
429N/A """
429N/A assert self._file_handle
1230N/A self._dict.clear()
1230N/A self._list = []
1230N/A for i, line in enumerate(self._file_handle):
1230N/A # A blank line means that id can be reused.
1230N/A tmp = self._build_func(line.rstrip("\n"))
1230N/A if line == "\n":
1230N/A self._list_of_empties.append(i)
1230N/A else:
1230N/A self._dict[tmp] = i
1230N/A self._list.append(tmp)
1230N/A self._line_cnt = i + 1
1230N/A self._next_id = i + 1
1230N/A IndexStoreBase.read_dict_file(self)
429N/A return self._line_cnt
429N/A
429N/A def count_entries_removed_during_partial_indexing(self):
429N/A """Returns the number of entries removed during a second phase
429N/A of indexing.
429N/A """
429N/A return len(self._list)
429N/A
429N/Aclass IndexStoreDict(IndexStoreBase):
429N/A """Class used when only entity -> id lookup is needed
429N/A """
429N/A
429N/A def __init__(self, file_name):
429N/A IndexStoreBase.__init__(self, file_name)
429N/A self._dict = {}
429N/A self._next_id = 0
429N/A
429N/A def get_dict(self):
429N/A return self._dict
429N/A
429N/A def get_entity(self, in_id):
429N/A return self._dict[in_id]
429N/A
429N/A def has_entity(self, entity):
3194N/A return entity in self._dict
429N/A
429N/A def read_dict_file(self):
429N/A """Reads in a dictionary stored in line number -> entity
429N/A format
429N/A """
1230N/A self._dict.clear()
1230N/A for line_cnt, line in enumerate(self._file_handle):
1230N/A line = line.rstrip("\n")
1230N/A self._dict[line_cnt] = line
1230N/A IndexStoreBase.read_dict_file(self)
429N/A
429N/A def count_entries_removed_during_partial_indexing(self):
429N/A """Returns the number of entries removed during a second phase
429N/A of indexing.
429N/A """
429N/A return len(self._dict)
429N/A
429N/Aclass IndexStoreDictMutable(IndexStoreBase):
429N/A """Dictionary which allows dynamic update of its storage
429N/A """
429N/A
429N/A def __init__(self, file_name):
429N/A IndexStoreBase.__init__(self, file_name)
429N/A self._dict = {}
429N/A
429N/A def get_dict(self):
429N/A return self._dict
429N/A
429N/A def has_entity(self, entity):
3194N/A return entity in self._dict
429N/A
429N/A def get_id(self, entity):
429N/A return self._dict[entity]
429N/A
499N/A def get_keys(self):
3234N/A return list(self._dict.keys())
499N/A
546N/A @staticmethod
546N/A def __quote(str):
546N/A if " " in str:
3234N/A return "1" + quote(str)
546N/A else:
546N/A return "0" + str
546N/A
429N/A def read_dict_file(self):
429N/A """Reads in a dictionary stored in with an entity
429N/A and its number on each line.
429N/A """
1230N/A self._dict.clear()
1230N/A for line in self._file_handle:
1653N/A token, offset = line.split(" ")
1653N/A if token[0] == "1":
3234N/A token = unquote(token[1:])
1653N/A else:
1653N/A token = token[1:]
1653N/A offset = int(offset)
1230N/A self._dict[token] = offset
1230N/A IndexStoreBase.read_dict_file(self)
429N/A
429N/A def open_out_file(self, use_dir, version_num):
429N/A """Opens the output file for this class and prepares it
429N/A to be written via write_entity.
429N/A """
546N/A self.write_dict_file(use_dir, version_num)
429N/A self._file_handle = open(os.path.join(use_dir, self._name),
3339N/A 'a', buffering=PKG_FILE_BUFSIZ)
429N/A
429N/A def write_entity(self, entity, my_id):
429N/A """Writes the entity out to the file with my_id """
429N/A assert self._file_handle is not None
941N/A self._file_handle.write(self.__quote(str(entity)) + " " +
941N/A str(my_id) + "\n")
429N/A
429N/A def write_dict_file(self, path, version_num):
429N/A """ Generates an iterable list of string representations of
429N/A the dictionary that the parent's protected_write_dict_file
429N/A function can call.
429N/A """
429N/A IndexStoreBase._protected_write_dict_file(self, path,
429N/A version_num, [])
429N/A
429N/A def count_entries_removed_during_partial_indexing(self):
429N/A """Returns the number of entries removed during a second phase
429N/A of indexing.
429N/A """
429N/A return 0
429N/A
516N/Aclass IndexStoreSetHash(IndexStoreBase):
516N/A def __init__(self, file_name):
516N/A IndexStoreBase.__init__(self, file_name)
2962N/A # In order to interoperate with older clients, we must use sha-1
2962N/A # here.
1516N/A self.hash_val = hashlib.sha1().hexdigest()
1230N/A
516N/A def set_hash(self, vals):
516N/A """Set the has value."""
2962N/A self.hash_val = self.calc_hash(vals)
516N/A
516N/A def calc_hash(self, vals):
516N/A """Calculate the hash value of the sorted members of vals."""
516N/A vl = list(vals)
516N/A vl.sort()
2962N/A # In order to interoperate with older clients, we must use sha-1
2962N/A # here.
1516N/A shasum = hashlib.sha1()
516N/A for v in vl:
3339N/A # Unicode-objects must be encoded before hashing.
3339N/A shasum.update(force_bytes(v))
516N/A return shasum.hexdigest()
2962N/A
516N/A def write_dict_file(self, path, version_num):
516N/A """Write self.hash_val out to a line in a file """
516N/A IndexStoreBase._protected_write_dict_file(self, path,
516N/A version_num, [self.hash_val])
516N/A
516N/A def read_dict_file(self):
516N/A """Process a dictionary file written using the above method
516N/A """
1230N/A sp = self._file_handle.tell()
1230N/A res = 0
1230N/A for res, line in enumerate(self._file_handle):
1230N/A assert res < 1
1230N/A self.hash_val = line.rstrip()
1230N/A self._file_handle.seek(sp)
1230N/A IndexStoreBase.read_dict_file(self)
1230N/A return res
941N/A
516N/A def check_against_file(self, vals):
516N/A """Check the hash value of vals against the value stored
516N/A in the file for this object."""
1230N/A if not self._have_read:
1230N/A self.read_dict_file()
516N/A incoming_hash = self.calc_hash(vals)
941N/A if self.hash_val != incoming_hash:
941N/A raise search_errors.IncorrectIndexFileHash(
941N/A self.hash_val, incoming_hash)
516N/A
516N/A def count_entries_removed_during_partial_indexing(self):
516N/A """Returns the number of entries removed during a second phase
516N/A of indexing."""
516N/A return 0
3234N/A
429N/Aclass IndexStoreSet(IndexStoreBase):
429N/A """Used when only set membership is desired.
429N/A This is currently designed for exclusive use
429N/A with storage of fmri.PkgFmris. However, that impact
429N/A is only seen in the read_and_discard_matching_from_argument
429N/A method.
429N/A """
429N/A def __init__(self, file_name):
429N/A IndexStoreBase.__init__(self, file_name)
429N/A self._set = set()
429N/A
429N/A def get_set(self):
429N/A return self._set
429N/A
1354N/A def clear(self):
1354N/A self._set.clear()
1354N/A
429N/A def add_entity(self, entity):
429N/A self._set.add(entity)
429N/A
429N/A def remove_entity(self, entity):
429N/A """Remove entity purposfully assumes that entity is
429N/A already in the set to be removed. This is useful for
429N/A error checking and debugging.
429N/A """
429N/A self._set.remove(entity)
429N/A
429N/A def has_entity(self, entity):
429N/A return (entity in self._set)
429N/A
429N/A def write_dict_file(self, path, version_num):
429N/A """Write each member of the set out to a line in a file """
429N/A IndexStoreBase._protected_write_dict_file(self, path,
429N/A version_num, self._set)
429N/A
429N/A def read_dict_file(self):
429N/A """Process a dictionary file written using the above method
429N/A """
429N/A assert self._file_handle
429N/A res = 0
1230N/A self._set.clear()
1230N/A for i, line in enumerate(self._file_handle):
1230N/A line = line.rstrip("\n")
1230N/A assert i == len(self._set)
1230N/A self.add_entity(line)
1230N/A res = i + 1
1230N/A IndexStoreBase.read_dict_file(self)
429N/A return res
429N/A
429N/A def read_and_discard_matching_from_argument(self, fmri_set):
429N/A """Reads the file and removes all frmis in the file
429N/A from fmri_set.
429N/A """
429N/A if self._file_handle:
429N/A for line in self._file_handle:
429N/A f = fmri.PkgFmri(line)
429N/A fmri_set.discard(f)
429N/A
429N/A def count_entries_removed_during_partial_indexing(self):
429N/A """Returns the number of entries removed during a second phase
429N/A of indexing."""
429N/A return len(self._set)
1054N/A
1054N/A
1054N/Aclass InvertedDict(IndexStoreBase):
1054N/A """Class used to store and process fmri to offset mappings. It does
1054N/A delta compression and deduplication of shared offset sets when writing
1054N/A to a file."""
1054N/A
1054N/A def __init__(self, file_name, p_id_trans):
1054N/A """file_name is the name of the file to write to or read from.
1054N/A p_id_trans is an object which has a get entity method which,
1054N/A when given a package id number returns the PkgFmri object
1054N/A for that id number."""
3234N/A
1054N/A IndexStoreBase.__init__(self, file_name)
1054N/A self._p_id_trans = p_id_trans
1054N/A self._dict = {}
1054N/A self._fmri_offsets = {}
1054N/A
1054N/A def __copy__(self):
1054N/A return self.__class__(self._name, self._p_id_trans)
1054N/A
1054N/A def add_pair(self, p_id, offset):
1054N/A """Adds a package id number and an associated offset to the
1054N/A existing dictionary."""
1054N/A
1054N/A try:
1054N/A self._fmri_offsets[p_id].append(offset)
1054N/A except KeyError:
1054N/A self._fmri_offsets[p_id] = [offset]
3234N/A
1054N/A def invert_id_to_offsets_dict(self):
1054N/A """Does delta encoding of offsets to reduce space by only
1054N/A storing the difference between the current offset and the
1054N/A previous offset. It also performs deduplication so that all
1054N/A packages with the same set of offsets share a common bucket."""
1054N/A
1054N/A inv = {}
3339N/A for p_id in list(self._fmri_offsets.keys()):
1054N/A old_o = 0
1054N/A bucket = []
1054N/A for o in sorted(set(self._fmri_offsets[p_id])):
1054N/A bucket.append(o - old_o)
1054N/A old_o = o
1054N/A h = " ".join([str(o) for o in bucket])
1054N/A del self._fmri_offsets[p_id]
1054N/A if h not in inv:
1054N/A inv[h] = []
1054N/A inv[h].append(p_id)
1054N/A return inv
1054N/A
1054N/A @staticmethod
1054N/A def __make_line(offset_str, p_ids, trans):
1054N/A """For a given offset string, a list of package id numbers,
1054N/A and a translator from package id numbers to PkgFmris, returns
1054N/A the string which represents that information. Its format is
1054N/A space separated package fmris, followed by a !, followed by
1054N/A space separated offsets which have had delta compression
1054N/A performed."""
1054N/A
1054N/A return " ".join([
1054N/A trans.get_entity(p_id).get_fmri(anarchy=True,
1054N/A include_scheme=False)
1054N/A for p_id in p_ids
1054N/A ]) + "!" + offset_str
3234N/A
1054N/A def write_dict_file(self, path, version_num):
1054N/A """Write the mapping of package fmris to offset sets out
1054N/A to the file."""
1054N/A
1054N/A inv = self.invert_id_to_offsets_dict()
1054N/A IndexStoreBase._protected_write_dict_file(self, path,
1054N/A version_num, (
1054N/A self.__make_line(o, inv[o], self._p_id_trans)
1054N/A for o in inv
1054N/A ))
1054N/A
1054N/A def read_dict_file(self):
1054N/A """Read a file written by the above function and store the
1054N/A information in a dictionary."""
1054N/A
1054N/A assert self._file_handle
1230N/A for l in self._file_handle:
1230N/A fmris, offs = l.split("!")
1230N/A self._dict[fmris] = offs
1230N/A IndexStoreBase.read_dict_file(self)
1054N/A
1054N/A @staticmethod
1054N/A def de_delta(offs):
1054N/A """For a list of strings of offsets, undo the delta compression
1054N/A that has been performed."""
1054N/A
1054N/A old_o = 0
1054N/A ret = []
1054N/A for o in offs:
1054N/A o = int(o) + old_o
1054N/A ret.append(o)
1054N/A old_o = o
1054N/A return ret
3234N/A
1054N/A def get_offsets(self, match_func):
1054N/A """For a given function which returns true if it matches the
1054N/A desired fmri, return the offsets which are associated with the
1054N/A fmris which match."""
1054N/A
1054N/A offs = []
1054N/A for fmris in self._dict.keys():
1054N/A for p in fmris.split():
1054N/A if match_func(p):
1054N/A offs.extend(self.de_delta(
1054N/A self._dict[fmris].split()))
1054N/A break
1054N/A return set(offs)