search_storage.py revision 941
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen#!/usr/bin/python
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen#
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# CDDL HEADER START
8aacc9e7c84f8376822823ec98c2f551d4919b2eTimo Sirainen#
16f816d3f3c32ae3351834253f52ddd0212bcbf3Timo Sirainen# The contents of this file are subject to the terms of the
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# Common Development and Distribution License (the "License").
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# You may not use this file except in compliance with the License.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen#
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
463e82bdf0e990f4f2252d2b53ea23a5abe5883cTimo Sirainen# or http://www.opensolaris.org/os/licensing.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen# See the License for the specific language governing permissions
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen# and limitations under the License.
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen#
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen# When distributing Covered Code, include this CDDL HEADER in each
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen# If applicable, add the following below this CDDL HEADER, with the
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen# fields enclosed by brackets "[]" replaced with your own identifying
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen# information: Portions Copyright [yyyy] [name of copyright owner]
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen#
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen# CDDL HEADER END
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen#
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen#
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen# Use is subject to license terms.
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen#
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport os
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport errno
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport time
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport sha
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport urllib
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport pkg.fmri as fmri
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport pkg.search_errors as search_errors
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainenimport pkg.portable as portable
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo SirainenFAST_ADD = 'fast_add.v1'
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo SirainenFAST_REMOVE = 'fast_remove.v1'
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo SirainenMANIFEST_LIST = 'manf_list.v1'
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo SirainenFULL_FMRI_FILE = 'full_fmri_list'
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo SirainenMAIN_FILE = 'main_dict.ascii.v2'
2c677e9d339bc91d5b54376ba2986f71476c06abTimo SirainenBYTE_OFFSET_FILE = 'token_byte_offset.v1'
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo SirainenFULL_FMRI_HASH_FILE = 'full_fmri_list.hash'
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainendef consistent_open(data_list, directory, timeout = 1):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Opens all data holders in data_list and ensures that the
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen versions are consistent among all of them.
bbf796c17f02538058d7559bfe96d677e5b55015Timo Sirainen It retries several times in case a race condition between file
e6d7d19c328e7043ad35d5a52c1617bde915a16fTimo Sirainen migration and open is encountered.
d7095f3a4466fbb78b2d5eb3d322bc15a5b0ab1fTimo Sirainen Note: Do not set timeout to be 0. It will cause an exception to be
153de7823e64c67678b3fc95719c41a8ec5b864dTimo Sirainen immediately raised.
153de7823e64c67678b3fc95719c41a8ec5b864dTimo Sirainen
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen """
b0be0bead3d6963149f7f2a9504b8ab5aced9af5Timo Sirainen missing = None
bbf796c17f02538058d7559bfe96d677e5b55015Timo Sirainen cur_version = None
e6d7d19c328e7043ad35d5a52c1617bde915a16fTimo Sirainen
8d131435ba4648c8821160ec38d508c97177c715Timo Sirainen start_time = time.time()
9315dd69233d554452df0c12bc57002d2042a8f4Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen while cur_version == None and missing != True:
538c58fc95200fcc5e91abdda8b912b574a2f968Timo Sirainen # The assignments to cur_version and missing cannot be
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen # placed here. They must be reset prior to breaking out of the
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen # for loop so that the while loop condition will be true. They
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # cannot be placed after the for loop since that path is taken
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # when all files are missing or opened successfully.
945631faab2bf1aed8d95a1fd0c317a9ce153725Timo Sirainen if timeout != None and ((time.time() - start_time) > timeout):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen raise search_errors.InconsistentIndexException(
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen directory)
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen for d in data_list:
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen # All indexes must have the same version and all must
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # either be present or absent for a successful return.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # If one of these conditions is not met, the function
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # tries again until it succeeds or the time spent in
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen # in the function is greater than timeout.
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen try:
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen f = os.path.join(directory, d.get_file_name())
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen fh = open(f, 'rb')
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen # If we get here, then the current index file
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # is present.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if missing == None:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen missing = False
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen elif missing:
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen for dl in data_list:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dl.close_file_handle()
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen missing = None
bbf796c17f02538058d7559bfe96d677e5b55015Timo Sirainen cur_version = None
8aacc9e7c84f8376822823ec98c2f551d4919b2eTimo Sirainen break
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen d.set_file_handle(fh, f)
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen version_tmp = fh.readline()
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen version_num = \
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen int(version_tmp.split(' ')[1].rstrip('\n'))
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # Read the version. If this is the first file,
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # set the expected version otherwise check that
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen # the version matches the expected version.
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen if cur_version == None:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen cur_version = version_num
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen elif not (cur_version == version_num):
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # Got inconsistent versions, so close
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # all files and try again.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen for d in data_list:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen d.close_file_handle()
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen missing = None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen cur_version = None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen break
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen except IOError, e:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen if e.errno == errno.ENOENT:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # If the index file is missing, ensure
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # that previous files were missing as
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # well. If not, try again.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if missing == False:
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen for d in data_list:
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen d.close_file_handle()
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen missing = None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen cur_version = None
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen break
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen missing = True
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen else:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen for d in data_list:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen d.close_file_handle()
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen raise
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen if missing:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen assert cur_version == None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # The index is missing (ie, no files were present).
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen return None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen else:
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen assert cur_version is not None
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen return cur_version
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenclass IndexStoreBase(object):
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen """Base class for all data storage used by the indexer and
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen queryEngine. All members must have a file name and maintain
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen an internal file handle to that file as instructed by external
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen calls.
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def __init__(self, file_name):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._name = file_name
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_handle = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_path = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._size = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._mtime = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def get_file_name(self):
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen return self._name
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen def set_file_handle(self, f_handle, f_path):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if self._file_handle:
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen raise RuntimeError("setting an extant file handle, "
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen "must close first, fp is: " + f_path)
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen else:
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_handle = f_handle
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_path = f_path
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def get_file_path(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return self._file_path
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen def __copy__(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return self.__class__(self._name)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def close_file_handle(self):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """Closes the file handle and clears it so that it cannot
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen be reused.
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if self._file_handle:
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen self._file_handle.close()
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_handle = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._file_path = None
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def _protected_write_dict_file(self, path, version_num, iterable):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """Writes the dictionary in the expected format.
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen Note: Only child classes should call this method.
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen """
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen version_string = "VERSION: "
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen file_handle = open(os.path.join(path, self._name), 'wb')
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen file_handle.write(version_string + str(version_num) + "\n")
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen for name in iterable:
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen file_handle.write(str(name) + "\n")
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen file_handle.close()
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def should_reread(self):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """This method uses the modification time and the file size
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen to (heuristically) determine whether the file backing this
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen storage has changed since it was last read.
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen """
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen stat_info = os.stat(self._file_path)
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen if self._mtime != stat_info.st_mtime or \
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen self._size != stat_info.st_size:
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen self._mtime = stat_info.st_mtime
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen self._size = stat_info.st_size
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen return True
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return False
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def open(self, directory):
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen """This uses consistent open to ensure that the version line
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen processing is done consistently and that only a single function
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen actually opens files stored using this class.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen return consistent_open([self], directory)
9315dd69233d554452df0c12bc57002d2042a8f4Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenclass IndexStoreMainDict(IndexStoreBase):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Class for representing the main dictionary file
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
7889c9f65e23c83fc31cecf304cab4ab070d6aa1Timo Sirainen # Here is an example of a line from the main dictionary, it is
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen # explained below:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # %gconf.xml (5,3,65689 => 249,202) (5,3,65690 => 249,202)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # (5,3,65691 => 249,202) (5,3,65692 => 249,202)
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen #
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # The main dictionary has a more complicated format. Each line begins
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # with a search token (%gconf.xml) followed by a list of mappings. Each
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # mapping takes a token_type, action, and keyvalue tuple ((5,3,65689),
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # (5,3,65690), (5,3,65691), (5,3,65692)) to a list of pkg-stem, version
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # pairs (249,202) in which the token is found in an action with
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # token_type, action, and keyvalues matching the tuple. Further
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # compaction is gained by storing everything but the token as an id
1cad0dd34667548ba39f794ddeb9fc486cf4c666Timo Sirainen # which the other dictionaries can turn into human-readable content.
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen #
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen # In short, the definition of a main dictionary entry is:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # Note: "(", ")", and "=>" actually appear in the file
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen # "[", "]", and "+" are used to specify pattern
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen # token [(token_type_id, action_id, keyval_id => [pkg_stem_id,version_id ]+)]+
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def __init__(self, file_name):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen IndexStoreBase.__init__(self, file_name)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self._old_suffix = None
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def write_dict_file(self, path, version_num):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """This class relies on external methods to write the file.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen Making this empty call to protected_write_dict_file allows the
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen file to be set up correctly with the version number stored
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen correctly.
88187ee880b4829443e0d55ea7d145d9d5880217Timo Sirainen """
88187ee880b4829443e0d55ea7d145d9d5880217Timo Sirainen IndexStoreBase._protected_write_dict_file(self, path,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen version_num, [])
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def get_file_handle(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Return the file handle. Note that doing
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen anything other than sequential reads or writes
075f90abe6b6b12dc72bca21bfce8086b4b190ecTimo Sirainen to or from this file_handle may result in unexpected
9bc6e10d9c6d6ffb4a2ed49a3b3d2a180f2a87a3Timo Sirainen behavior. In short, don't use seek.
9bc6e10d9c6d6ffb4a2ed49a3b3d2a180f2a87a3Timo Sirainen """
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen return self._file_handle
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen @staticmethod
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def __parse_main_dict_line_help(split_chars, unquote_list, line):
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen if not split_chars:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if not line:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen raise se.EmptyMainDictLine(split_chars, unquote_list)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen elif not unquote_list:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen raise se.EmptyUnquoteList(split_chars, line)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen else:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen assert len(unquote_list) == 1
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if unquote_list[0]:
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen return urllib.unquote(line)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen else:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return line
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen else:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen cur_char = split_chars[0]
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen tmp = line.split(cur_char)
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen if unquote_list[0]:
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen header = urllib.unquote(tmp[0])
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen else:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen header = tmp[0]
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return (header, [
abfcd9f73b9ad1eeef4fe6e9940383defabf68c3Timo Sirainen IndexStoreMainDict.__parse_main_dict_line_help(
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen split_chars[1:], unquote_list[1:], x)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen for x
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen in tmp[1:]])
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen @staticmethod
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def parse_main_dict_line(line):
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """Parses one line of a main dictionary file.
abfcd9f73b9ad1eeef4fe6e9940383defabf68c3Timo Sirainen Changes to this function must be paired with changes to
abfcd9f73b9ad1eeef4fe6e9940383defabf68c3Timo Sirainen write_main_dict_line below.
cd5ee8630497fdbd853ef588a858b4ef619a5e03Timo Sirainen """
cd5ee8630497fdbd853ef588a858b4ef619a5e03Timo Sirainen
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen line = line.rstrip('\n')
7394389230750c45b105cdefb5850c81cae8cdc0Timo Sirainen return IndexStoreMainDict.__parse_main_dict_line_help(
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen [" ", "!", "@", "#", ","],
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen [True, False, False, True, False, False], line)
7e94cf9d70ce9fdeccb7a85ff400b899e6386f36Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen @staticmethod
7e94cf9d70ce9fdeccb7a85ff400b899e6386f36Timo Sirainen def __write_main_dict_line_help(file_handle, sep_chars, quote, entries):
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen assert sep_chars
9887c39c5ba429169389153ca99de49e084a73f0Timo Sirainen if not isinstance(entries, tuple):
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen assert len(sep_chars) == 1
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen file_handle.write(sep_chars[0])
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen if quote[0]:
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen file_handle.write(urllib.quote(str(entries)))
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen else:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen file_handle.write(str(entries))
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen header, entries = entries
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen file_handle.write(sep_chars[0])
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if quote[0]:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen file_handle.write(urllib.quote(str(header)))
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen else:
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen file_handle.write(str(header))
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen for e in entries:
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen IndexStoreMainDict.__write_main_dict_line_help(
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen file_handle, sep_chars[1:], quote[1:], e)
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen @staticmethod
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen def write_main_dict_line(file_handle, token, lst):
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen """Paired with parse_main_dict_line above. Writes
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen a line in a main dictionary file in the appropriate format.
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen """
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen IndexStoreMainDict.__write_main_dict_line_help(file_handle,
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen ["", " ", "!", "@", "#", ","],
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen [True, False, False, True, False, False], (token, lst))
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen file_handle.write("\n")
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen @staticmethod
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen def __transform_main_dict_line_help(sep_chars, quote, entries):
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen assert sep_chars
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen ret = [sep_chars[0]]
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen if not isinstance(entries, tuple):
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen assert len(sep_chars) == 1
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen if quote[0]:
2ebfb5c0608e2323b73271208f4036a7ea7d7f3aTimo Sirainen ret.append(urllib.quote(str(entries)))
2ebfb5c0608e2323b73271208f4036a7ea7d7f3aTimo Sirainen else:
2ebfb5c0608e2323b73271208f4036a7ea7d7f3aTimo Sirainen ret.append(str(entries))
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen return ret
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen header, entries = entries
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen if quote[0]:
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen ret.append(urllib.quote(str(header)))
41bb0aa8e357876bc9a1916a37c9e3e78e5f8185Timo Sirainen else:
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen ret.append(str(header))
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen for e in entries:
dd8de60250511cc729b67249e61dfc6b4debff11Timo Sirainen tmp = \
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen IndexStoreMainDict.__transform_main_dict_line_help(
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen sep_chars[1:], quote[1:], e)
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen ret.extend(tmp)
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen return ret
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen @staticmethod
cd5ee8630497fdbd853ef588a858b4ef619a5e03Timo Sirainen def transform_main_dict_line(token, lst):
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen """Paired with parse_main_dict_line above. Writes
7394389230750c45b105cdefb5850c81cae8cdc0Timo Sirainen a line in a main dictionary file in the appropriate format.
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen """
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen tmp = IndexStoreMainDict.__transform_main_dict_line_help(
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen ["", " ", "!", "@", "#", ","],
659fe5d24825b160cae512538088020d97a60239Timo Sirainen [True, False, False, True, False, False], (token, lst))
88187ee880b4829443e0d55ea7d145d9d5880217Timo Sirainen tmp.append("\n")
659fe5d24825b160cae512538088020d97a60239Timo Sirainen return "".join(tmp)
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen def count_entries_removed_during_partial_indexing(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Returns the number of entries removed during a second phase
41bb0aa8e357876bc9a1916a37c9e3e78e5f8185Timo Sirainen of indexing.
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen """
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen # This returns 0 because this class is not responsible for
b35f7104715edee0cfac6d46ab0b342033867eb7Timo Sirainen # storing anything in memory.
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen return 0
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen
51920d00fa50edf7b2e9b1019288d64b7abee7f3Timo Sirainen def shift_file(self, use_dir, suffix):
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen """Moves the existing file with self._name in directory
51920d00fa50edf7b2e9b1019288d64b7abee7f3Timo Sirainen use_dir to a new file named self._name + suffix in directory
21ec6628c567eeff025af35d8027be01044b0b1aTimo Sirainen use_dir. If it has done this previously, it removes the old
21ec6628c567eeff025af35d8027be01044b0b1aTimo Sirainen file it moved. It also opens the newly moved file and uses
21ec6628c567eeff025af35d8027be01044b0b1aTimo Sirainen that as the file for its file handle.
21ec6628c567eeff025af35d8027be01044b0b1aTimo Sirainen """
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen assert self._file_handle is None
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen orig_path = os.path.join(use_dir, self._name)
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen new_path = os.path.join(use_dir, self._name + suffix)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen portable.rename(orig_path, new_path)
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen tmp_name = self._name
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen self._name = self._name + suffix
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen self.open(use_dir)
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen self._name = tmp_name
3342badd8c69adff34db589fb0a221ace5996212Timo Sirainen if self._old_suffix is not None:
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen os.remove(os.path.join(use_dir, self._old_suffix))
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen self._old_suffix = self._name + suffix
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
fa5957ffc9b676bfd649fa9953e63e72ee4ebeb4Timo Sirainen
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenclass IndexStoreListDict(IndexStoreBase):
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen """Used when both a list and a dictionary are needed to
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen store the information. Used for bidirectional lookup when
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen one item is an int (an id) and the other is not (an entity). It
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen maintains a list of empty spots in the list so that adding entities
cd5ee8630497fdbd853ef588a858b4ef619a5e03Timo Sirainen can take advantage of unused space. It encodes empty space as a blank
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen line in the file format and '' in the internal list.
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen """
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen def __init__(self, file_name, build_function=None):
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen IndexStoreBase.__init__(self, file_name)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen self._list = []
9887c39c5ba429169389153ca99de49e084a73f0Timo Sirainen self._dict = {}
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen self._next_id = 0
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen self._list_of_empties = []
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen self._build_func = build_function
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen self._line_cnt = 0
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen def add_entity(self, entity, is_empty):
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen """Adds an entity consistently to the list and dictionary
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen allowing bidirectional lookup.
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen """
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen assert (len(self._list) == self._next_id)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen if self._list_of_empties and not is_empty:
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen use_id = self._list_of_empties.pop(0)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen assert use_id <= len(self._list)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen if use_id == len(self._list):
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen self._list.append(entity)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen self._next_id += 1
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen else:
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen self._list[use_id] = entity
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen else:
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen use_id = self._next_id
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen self._list.append(entity)
5a7b52012bf77132bb8f466d07e0e88c63fdba42Timo Sirainen self._next_id += 1
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen if not(is_empty):
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen self._dict[entity] = use_id
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen assert (len(self._list) == self._next_id)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return use_id
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def remove_id(self, in_id):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """deletes in_id from the list and the dictionary """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen entity = self._list[in_id]
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list[in_id] = ""
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen self._dict[entity] = ""
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def remove_entity(self, entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """deletes the entity from the list and the dictionary """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen in_id = self._dict[entity]
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict[entity] = ""
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list[in_id] = ""
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_id(self, entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """returns the id of entity """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict[entity]
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_id_and_add(self, entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """Adds entity if it's not previously stored and returns the
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen id for entity.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen # This code purposefully reimplements add_entity
e063aca6bc2f08bec516d4b631052ea9191f011dTimo Sirainen # code. Replacing the function calls to has_entity, add_entity,
e063aca6bc2f08bec516d4b631052ea9191f011dTimo Sirainen # and get_id with direct access to the data structure gave a
e063aca6bc2f08bec516d4b631052ea9191f011dTimo Sirainen # speed up of a factor of 4. Because this is a very hot path,
e063aca6bc2f08bec516d4b631052ea9191f011dTimo Sirainen # the tradeoff seemed appropriate.
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if not self._dict.has_key(entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen assert (len(self._list) == self._next_id)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen if self._list_of_empties:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen use_id = self._list_of_empties.pop(0)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen assert use_id <= len(self._list)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen if use_id == len(self._list):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list.append(entity)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._next_id += 1
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen else:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list[use_id] = entity
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen else:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen use_id = self._next_id
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list.append(entity)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._next_id += 1
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict[entity] = use_id
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen assert (len(self._list) == self._next_id)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict[entity]
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_entity(self, in_id):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """return the entity in_id maps to """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._list[in_id]
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def has_entity(self, entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """check if entity is in storage """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict.has_key(entity)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def has_empty(self):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """Check if the structure has any empty elements which
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen can be filled with data.
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return (len(self._list_of_empties) > 0)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_next_empty(self):
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen """returns the next id which maps to no element """
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen return self._list_of_empties.pop()
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen def write_dict_file(self, path, version_num):
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen """Passes self._list to the parent class to write to a file.
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen """
2c677e9d339bc91d5b54376ba2986f71476c06abTimo Sirainen IndexStoreBase._protected_write_dict_file(self, path,
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen version_num,
f81a4d2002da0db33d11ca694d3a91b3ee2a0fdbTimo Sirainen self._list)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def read_dict_file(self):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """Reads in a dictionary previously stored using the above
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen call
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen assert self._file_handle
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen if self.should_reread():
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict.clear()
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen self._list = []
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen for i, line in enumerate(self._file_handle):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen # A blank line means that id can be reused.
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen tmp = line.rstrip('\n')
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen if line == '\n':
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen self._list_of_empties.append(i)
c91de2744f8c1e61e91082ff5e214450f28a0e7cTimo Sirainen else:
a0d34d3982507f513a9d800082481e9faeb9a943Timo Sirainen if self._build_func:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen tmp = self._build_func(tmp)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict[tmp] = i
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._list.append(tmp)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._line_cnt = i + 1
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._next_id = i + 1
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._line_cnt
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def count_entries_removed_during_partial_indexing(self):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """Returns the number of entries removed during a second phase
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen of indexing.
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen """
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen return len(self._list)
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainenclass IndexStoreDict(IndexStoreBase):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """Class used when only entity -> id lookup is needed
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen def __init__(self, file_name):
a0d34d3982507f513a9d800082481e9faeb9a943Timo Sirainen IndexStoreBase.__init__(self, file_name)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict = {}
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen self._next_id = 0
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_dict(self):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_entity(self, in_id):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict[in_id]
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen def has_entity(self, entity):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen return self._dict.has_key(entity)
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen def read_dict_file(self):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """Reads in a dictionary stored in line number -> entity
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen format
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen if self.should_reread():
a0d34d3982507f513a9d800082481e9faeb9a943Timo Sirainen self._dict.clear()
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen for line_cnt, line in enumerate(self._file_handle):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen line = line.rstrip('\n')
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict[line_cnt] = line
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def matching_read_dict_file(self, in_set, update=False):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen """If it's necessary to reread the file, it rereads the
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen file. It matches the line it reads against the contents of
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen in_set. If a match is found, the entry on the line is stored
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen for later use, otherwise the line is skipped. When all items
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen in in_set have been matched, the method is done and returns.
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen if update or self.should_reread():
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen if not update:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen self._dict.clear()
7ded22760598b78ee29f9418eacc0abe3fb51055Timo Sirainen match_cnt = 0
2d79e603e20a32bdae4c2b516ead5c5c9169545aTimo Sirainen max_match = len(in_set)
86d52f310fe939090c66b780a3b6ffe5d10dc8faTimo Sirainen self._file_handle.seek(0)
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen # skip the version line
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen self._file_handle.next()
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen for i, line in enumerate(self._file_handle):
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen if i in in_set:
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen match_cnt += 1
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen line = line.rstrip('\n')
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen self._dict[i] = line
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen if match_cnt >= max_match:
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen break
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
7ded22760598b78ee29f9418eacc0abe3fb51055Timo Sirainen def count_entries_removed_during_partial_indexing(self):
7ded22760598b78ee29f9418eacc0abe3fb51055Timo Sirainen """Returns the number of entries removed during a second phase
2d79e603e20a32bdae4c2b516ead5c5c9169545aTimo Sirainen of indexing.
2d79e603e20a32bdae4c2b516ead5c5c9169545aTimo Sirainen """
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return len(self._dict)
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainenclass IndexStoreDictMutable(IndexStoreBase):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """Dictionary which allows dynamic update of its storage
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen """
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen def __init__(self, file_name):
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen IndexStoreBase.__init__(self, file_name)
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen self._dict = {}
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen
a5b331e18b220fac557480b569b85215a1b3bd8eTimo Sirainen def get_dict(self):
a0d34d3982507f513a9d800082481e9faeb9a943Timo Sirainen return self._dict
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def has_entity(self, entity):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return self._dict.has_key(entity)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def get_id(self, entity):
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen return self._dict[entity]
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen def get_keys(self):
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen return self._dict.keys()
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen @staticmethod
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen def __unquote(str):
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen if str[0] == "1":
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen return urllib.unquote(str[1:])
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen else:
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen return str[1:]
faed8babca9914257f34fb2e603d74016d563b2dTimo Sirainen
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen @staticmethod
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen def __quote(str):
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen if " " in str:
16c89b1260c9d07c01c83a9219424d3727069b2eTimo Sirainen return "1" + urllib.quote(str)
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen else:
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen return "0" + str
d143077bd518de129b8d446fb58e003903e50867Timo Sirainen
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen def read_dict_file(self):
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen """Reads in a dictionary stored in with an entity
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen and its number on each line.
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen """
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen if self.should_reread():
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen self._dict.clear()
7ded22760598b78ee29f9418eacc0abe3fb51055Timo Sirainen for line in self._file_handle:
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen res = line.split(" ")
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen token = self.__unquote(res[0])
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen offset = int(res[1])
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen self._dict[token] = offset
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen def open_out_file(self, use_dir, version_num):
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen """Opens the output file for this class and prepares it
91d4c7b37580b031ed7b0154ae10c643521803f3Timo Sirainen to be written via write_entity.
91d4c7b37580b031ed7b0154ae10c643521803f3Timo Sirainen """
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen self.write_dict_file(use_dir, version_num)
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen self._file_handle = open(os.path.join(use_dir, self._name),
5626ae5e3316eced244adb6485c0927f1c7fdc41Timo Sirainen 'ab', buffering=131072)
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen
6a19e109ee8c5a6f688da83a86a7f6abeb71abddTimo Sirainen def write_entity(self, entity, my_id):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Writes the entity out to the file with my_id """
ccffbed92cb02c24fd717808a84138240bf1885bTimo Sirainen assert self._file_handle is not None
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self._file_handle.write(self.__quote(str(entity)) + " " +
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen str(my_id) + "\n")
ccffbed92cb02c24fd717808a84138240bf1885bTimo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def write_dict_file(self, path, version_num):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """ Generates an iterable list of string representations of
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen the dictionary that the parent's protected_write_dict_file
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen function can call.
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen """
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen IndexStoreBase._protected_write_dict_file(self, path,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen version_num, [])
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def count_entries_removed_during_partial_indexing(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Returns the number of entries removed during a second phase
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen of indexing.
ccffbed92cb02c24fd717808a84138240bf1885bTimo Sirainen """
ccffbed92cb02c24fd717808a84138240bf1885bTimo Sirainen return 0
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainenclass IndexStoreSetHash(IndexStoreBase):
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen def __init__(self, file_name):
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen IndexStoreBase.__init__(self, file_name)
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen self.hash_val = sha.new().hexdigest()
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen
d12f05c7c391786d0d9795ec3aa4377280bbfaeaTimo Sirainen def set_hash(self, vals):
d12f05c7c391786d0d9795ec3aa4377280bbfaeaTimo Sirainen """Set the has value."""
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen self.hash_val = self.calc_hash(vals)
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen def calc_hash(self, vals):
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen """Calculate the hash value of the sorted members of vals."""
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen vl = list(vals)
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen vl.sort()
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen shasum = sha.new()
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen for v in vl:
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen shasum.update(v)
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen return shasum.hexdigest()
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen def write_dict_file(self, path, version_num):
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen """Write self.hash_val out to a line in a file """
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen IndexStoreBase._protected_write_dict_file(self, path,
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen version_num, [self.hash_val])
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen def read_dict_file(self):
e26a771fad55dfba4d5021d12ed5685c951d9b7bTimo Sirainen """Process a dictionary file written using the above method
b0be0bead3d6963149f7f2a9504b8ab5aced9af5Timo Sirainen """
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen assert self._file_handle
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen res = 0
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen for res, line in enumerate(self._file_handle):
b0be0bead3d6963149f7f2a9504b8ab5aced9af5Timo Sirainen assert res < 1
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self.hash_val = line.rstrip()
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return res
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def check_against_file(self, vals):
c3248869ddd406a7a46b8c05633f0cccc72fcb77Timo Sirainen """Check the hash value of vals against the value stored
c3248869ddd406a7a46b8c05633f0cccc72fcb77Timo Sirainen in the file for this object."""
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen incoming_hash = self.calc_hash(vals)
ae8817f05005f57bba32479a610b52d083e2b6ebTimo Sirainen if self.hash_val != incoming_hash:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen raise search_errors.IncorrectIndexFileHash(
5c7aa03f959b8b9cab3eba8a585a90f4b50a4cdfTimo Sirainen self.hash_val, incoming_hash)
5c7aa03f959b8b9cab3eba8a585a90f4b50a4cdfTimo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def count_entries_removed_during_partial_indexing(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Returns the number of entries removed during a second phase
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen of indexing."""
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return 0
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
d41573018e85896ec836d897fd554e87126147f5Timo Sirainenclass IndexStoreSet(IndexStoreBase):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Used when only set membership is desired.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen This is currently designed for exclusive use
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen with storage of fmri.PkgFmris. However, that impact
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen is only seen in the read_and_discard_matching_from_argument
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen method.
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen """
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen def __init__(self, file_name):
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen IndexStoreBase.__init__(self, file_name)
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen self._set = set()
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen def get_set(self):
d41573018e85896ec836d897fd554e87126147f5Timo Sirainen return self._set
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def add_entity(self, entity):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self._set.add(entity)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen def remove_entity(self, entity):
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen """Remove entity purposfully assumes that entity is
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen already in the set to be removed. This is useful for
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen error checking and debugging.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen self._set.remove(entity)
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen def has_entity(self, entity):
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen return (entity in self._set)
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen
ae8817f05005f57bba32479a610b52d083e2b6ebTimo Sirainen def write_dict_file(self, path, version_num):
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen """Write each member of the set out to a line in a file """
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen IndexStoreBase._protected_write_dict_file(self, path,
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen version_num, self._set)
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen
1dec807061d7d428dba5c5a92cd2a5ff843a2039Timo Sirainen def read_dict_file(self):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """Process a dictionary file written using the above method
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen assert self._file_handle
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen res = 0
31ddc75584c5cde53d2e78a737587f2e7fdcb0d2Timo Sirainen if self.should_reread():
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self._set.clear()
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen for i, line in enumerate(self._file_handle):
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen line = line.rstrip('\n')
5c1a8aee989af87bddefd71e2aa83aa2bd695155Timo Sirainen assert i == len(self._set)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen self.add_entity(line)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen res = i + 1
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen return res
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen def read_and_discard_matching_from_argument(self, fmri_set):
31ddc75584c5cde53d2e78a737587f2e7fdcb0d2Timo Sirainen """Reads the file and removes all frmis in the file
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen from fmri_set.
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen """
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if self._file_handle:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen for line in self._file_handle:
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen f = fmri.PkgFmri(line)
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen fmri_set.discard(f)
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen def count_entries_removed_during_partial_indexing(self):
1b3bb8d39686ed24730cbc31cc9a33dc62c8c6c3Timo Sirainen """Returns the number of entries removed during a second phase
5a07b37a9df398b5189c14872a600384208ab74bTimo Sirainen of indexing."""
e06c0b65c16ccce69bbee009ead14d7d3d17a256Timo Sirainen return len(self._set)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen