#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
#
import os
import fnmatch
import re
import six
import sys
import threading
import copy
import itertools
import errno
"""This class defines the lexer used to separate parse queries into
its constituents. It's written for Ply, a python implementation for
lex and yacc."""
# These are the types of tokens that the lexer can produce.
"QUOTE2", "LBRACE", "RBRACE")
# These statements define the lexing rules or (, ),', and ". These are
# all checked after all lexing defined in functions below.
# This rule causes spaces to break tokens, but the space itself is not
# reported as a token.
# Set up a dictionary of strings which have special meaning and
# are otherwise indistinguishable from normal terms. The
# mapping is from the string seen in the query to the lexer
# tokens.
"AND" : "AND",
"OR" : "OR"
}
# Note: Functions are documented using comments instead of docstrings
# below because Ply uses the docstrings for specific purposes.
# This rule is for lexing the left side of the pkgs<>
# construction.
r"([pP]([kK]([gG][sS]?)?)?)?<"
t.type = "LBRACE"
return t
# This rule is for lexing the right side of the pkgs<>
# construction.
r">"
t.type = "RBRACE"
return t
# This rule handles valid search terms with a colon in them. If
# all colons are escaped, it produces a TERM token whose value
# is the original term with the escape characters removed. If
# there are unescaped colons, it produces an FTERM token whose
# value is a four tuple consisting of the pkg name, the action
# type, the action key, and the token that followed the last
# unescaped colon.
#
# The following regular expresion matches a string with a colon
# in it, subject to certain other restrictions, such as not
# beginning with a quote. It consists of three parts: the
# part before the colon, the colon, and the part after colon.
# The part before the colon is attempting to match anything that
# could come before a colon acting as a field deliminater or
# the escaped colon in a token. The colon is matching either a
# colon acting as a field separator or an escaped colon in a
# token or field. The part after the colon is attempting to
# match the valid term that can follow a colon that's either
# a field separator or escaped as part of a term.
r"([^\'\"\(\s][^\s]*)?\:([^\s\(\'\"][^\s]*[^\s\)\'\"\>]|[^\s\(\)\'\"\>])?"
else:
# If the last item in the list is not the empty string, then
# it's possible that there was no field query and that all the
# colons were escaped. In that case, treat the item as a TERM
# rather than an FTERM.
# If an unescaped colon was in the term, then this was actually
# a FTERM, so fill in the fields and set the type to FTERM.
else:
action_type = ""
pkg_name = ""
t.type = "FTERM"
return t
# This rule handles the general search terms as well as
# checking for any reserved words such as AND or OR.
r'[^\s\(\'\"][^\s]*[^\s\)\'\"\>]|[^\s\(\)\'\"]'
return t
[_("An unparseable character in query at position : {0:d}").format(
"""This is a function useful for testing and debugging as it
shows the user exactly which tokens are produced from the input
data."""
while 1:
if not tok:
break
"""This class defines the parser which converts a stream of tokens into
an abstract syntax tree representation of the query. The AST is able
to perform the search."""
# Use the same set of tokens as the lexer.
# Define the precendence and associativity of certain tokens to
precedence = (
("right", "FTERM"),
("left", "TERM"),
("right", "AND", "OR"),
("right", "QUOTE1", "QUOTE2"),
)
# Note: Like the lexer, Ply uses the doctrings of the functions to
# determine the rules.
# This is the top or start node of the AST.
'''start : xterm'''
# This rule parses xterms. xterms are terms which can connect
# smaller terms together.
''' xterm : basetermlist
| andterm
| orterm'''
p[0] = p[1]
# basetermlist handles performing the implicit AND operator
# which is placed between a list of terms. For example the
# query 'foo bar' is treated the same as 'foo AND bar'.
''' basetermlist : baseterm
| baseterm basetermlist '''
if len(p) == 3:
else:
p[0] = p[1]
# baseterms are the minimal units of meaning for the parser.
# Any baseterm is a valid query unto itself.
''' baseterm : term
| fterm
| phraseterm
| parenterm
| pkgconv'''
p[0] = p[1]
# terms are the parser's representation of the lexer's TERMs.
# The TermQuery object performs most of the work of actually
# performing the search.
'term : TERM'
# fterms are the parser's representation of the lexer's FTERMS
# (which are field/structured query terms). In the query
# 'foo:bar:baz:zap', foo, bar, and baz are part of the FTERM,
# zap is split into a separate lexer TERM token. zap flows
# up from parsing the ftermarg. A query like 'foo:bar:baz' has
# no ftermarg though. In that case, an implict wildcard is
# explicitly created.
'''fterm : FTERM ftermarg
| FTERM fterm
| FTERM'''
# If the len of p is 3, then one of the first two cases
# was used.
if len(p) == 3:
# If no token was attached to the FTERM, then attach
# the term found following it. If a token was attached
# to the FTERM then following term is treated like a
# basetermlist.
if token == "":
fields, p[2])
else:
p[2])
# If the length of p isn't 3, then a bare FTERM was found. If
# no token was attached to the FTERM, it's necessary to make
# the implicit wildcard explicit.
else:
if token == "":
token = "*"
# ftermargs are the terms which are valid after the final
# colon of a field term.
'''ftermarg : term
| phraseterm'''
p[0] = p[1]
# phraseterms are lists of terms enclosed by quotes.
''' phraseterm : QUOTE1 term_list QUOTE1
| QUOTE2 term_list QUOTE2'''
p[2].reverse()
# term_lists consist of one or more space separated TERMs.
''' term_list : TERM term_list
| TERM '''
if len(p) == 3:
p[0] = p[2]
else:
p[0] = [p[1]]
# parenterms contain a single xterm surrounded by parens.
# The p[2] argument is simply passed on because the only
# role of parens is to perform grouping, which is enforced
# by the structure of the AST.
''' parenterm : LPAREN xterm RPAREN '''
p[0] = p[2]
# pkgconv represents the pkgs<> term.
'pkgconv : LBRACE xterm RBRACE'
# andterms perform an intersection of the results of its
# two children.
''' andterm : xterm AND xterm'''
''' orterm : xterm OR xterm'''
# orterms returns the union of the results of its
# two children.
"""Build a parser using the lexer given as an argument."""
# Store the classes used to build the AST so that child classes
# can replace them where needed with alternate classes.
self.query_objs = {
"AndQuery" : AndQuery,
"FieldQuery" : FieldQuery,
"OrQuery" : OrQuery,
"PhraseQuery" : PhraseQuery,
"PkgConversion" : PkgConversion,
"TermQuery" : TermQuery,
"TopQuery" : TopQuery
}
"""Parse the string, input, into an AST using the rules defined
in this class."""
pass
return _("The number of terms in the query is {len:d}, "
"which exceeds the maximum supported "
return _("In query {query}, {name} had a bad value of "
"'{bv}'.").format(
)
return _("A query is expected to have five fields: "
"case sensitivity, return type, number of results to "
"return, the number at which to start returning results, "
"and the text of the query. The query provided lacked at "
self.p = parse_object
# BUI will interpret a line starting with a \t as pre-formatted
# and put it in <pre> tags.
"""General Query object. It defines various constants and provides for
marshalling a Query into and out of a string format."""
"""Construct a query object.
The "text" parameter is the tokens and syntax of the query.
The "case_sensitive" parameter is a boolean which determines
whether the query is case sensitive or not.
The "return_type" parameter must be either RETURN_PACKAGES or
RETURN_ACTIONS and determines whether the query is expected to
return packages or actions to the querier.
The "num_to_return" paramter is the maximum number of results to
return.
The "start_point" parameter is the number of results to skip
before returning results to the querier."""
if token_cnt > MAX_TOKEN_COUNT:
raise QueryLengthExceeded(token_cnt)
"""Return the v1 string representation of this query."""
def fromstr(s):
"""Take the output of the __str__ method of this class and
return a Query object from that string."""
try:
except ValueError:
raise IncompleteQuery(s)
if case_sensitive == 'True':
elif case_sensitive == 'False':
else:
raise DetailedValueError("case_sensitive",
case_sensitive, s)
if num_to_return == 'None':
num_to_return = None
else:
try:
except ValueError:
raise DetailedValueError("num_to_return",
num_to_return, s)
if start_point == 'None':
start_point = None
else:
try:
except ValueError:
raise DetailedValueError("start_point",
start_point, s)
try:
except ValueError:
def return_action_to_key(k):
"""Method which produces the sort key for an action."""
return pfmri
"""This exception is used when the two children of a boolean query
don't agree on whether to return actions or packages."""
"""The parameter "ac" is the child which returned actions
while "pc" is the child which returned packages."""
# BUI will interpret a line starting with a \t as pre-formatted
# and put it in <pre> tags.
ac_s = _("This expression produces action results:")
pc_s = _("This expression produces package results:")
_("'AND' and 'OR' require those expressions to produce "
"the same type of results.")])
"""Superclass for all boolean operations in the AST."""
"""The parameters "left_query" and "right_query" are objects
which implement the query interface. Specifically, they're
expected to implement search, allow_version, set_info, and to
have a public member called return_type."""
else:
"""This function passes information to the terms prior to
search being executed. For a boolean query, it only needs to
pass whatever information exists onto its children."""
"""Distributes the search to the two children and returns a
tuple of the results."""
"""Sort the results. If the results are actions, sort by the
fmris of the packages from which they came."""
key = None
"""Returns whether the query supports version v."""
"""Makes each child return packages instead of actions.
If a child returns a value that isn't None, that means a new
node in the tree has been created which needs to become the
new child for this node."""
if new_lc:
if new_rc:
return None
"""Class representing AND queries in the AST."""
"""Performs a search over the two children and combines
the results.
The "restriction" parameter is a generator of actions, over
which the search shall be performed. Only boolean queries will
set restriction. Nodes that return packages have, by
definition, parents that must return packages. This means that
only queries contained within a boolean query higher up in the
AST tree will have restriction set."""
# If actions are being returned, the answers from
# previous terms must be used as the domain of search.
# To do this, restriction is passed to the left
# child and the result from that child is passed to
# the right child as its domain.
else:
# If packages are being returned, holding the names
# of all known packages in memory is feasible. By
# using sets, and their intersection, duplicates are
# also removed from the results.
"""Class representing OR queries in the AST."""
"""Performs a search over the two children and combines
the results.
The "restriction" parameter is a generator function that returns
actions within the search domain. If it's not None,
then this query is under a higher boolean query which also
returns actions."""
# If packages are being returned, it's feasible to
# hold them all in memory. This allows the use of
# sets, and their union, to remove duplicates and
# produce a sorted list.
yield i
elif not restriction:
# If restriction doesn't exist, then chain together
# the results from both children.
yield i
else:
# If restriction exists, then it must serve as the
# domain for the children. It is a generator so
# only one pass may be made over it. Also, it is not
# possible, in general, to hold a list of all items
# that the generator will produce in memory. These
# reasons lead to the construction below, which iterates
# over the results in restriction and uses each as the
# restriction to the child search. If this turns out
# to be a performance bottleneck, it would be possible
# to gather N of the results in restriction into a list
# and dispatch them to the children results in one shot.
# The tradeoff is the memory to hold O(N) results in
# memory at each level of ORs in the AST.
for i in restriction:
yield j
"""Class representing a change from returning actions to returning
packages in the AST."""
"""This function passes information to the terms prior to
search being executed. It only needs to pass whatever
information exists to its child."""
"""Based on the return_type and current type, it converts the
iterator over results, it, to a sorted list of packages.
return_type is what the caller wants to return and current_type
is what it is iterating over."""
return it
else:
assert 0
"""Takes the results of its child's search and converts the
results to be a sorted list of packages.
The "restriction" parameter is a generator of actions which
the domain over which search should be performed. It should
always be None."""
return self.optional_action_to_package(
"""Returns whether the query supports a query of version v."""
"""Makes this node return packages instead of actions.
Returns None because no changes need to be made to the tree."""
return None
"""Class representing a phrase search in the AST"""
"""The "str_list" parameter is the list of strings which make
up the phrase to be searched.
The "term_query_class" parameter is a TermQuery object which
handles searching for the initial word in the phrase."""
assert str_list
self._case_sensitive = None
"""This function passes information to the terms prior to
search being executed. It only needs to pass whatever
information exists to its child."""
if not case_sensitive:
"""Check to see if the phrase is contained in l, the string of
the original action."""
if self._case_sensitive:
return self.compare_str in l
"""Checks to see if the phrase being searched for is a subtring
of the value which matched the token. If it is, use the value
returned, otherwise use the search phrase."""
return fv
else:
return fs
"""Perform a search for the given phrase. The child is used to
find instances of the first word of the phrase. Those results
are then filtered by searching for the longer phrase within
the original line of the action. restriction is a generator
function that returns actions within the search domain."""
it = (
)
return it
"""Returns whether the query supports a query of version v."""
"""Inserts a conversion to package results into the tree.
Creates a new node by wrapping a PkgConversion node around
itself. It then returns the new node to its parent for
insertion into the tree."""
return PkgConversion(self)
"""Class representing a structured query in the AST."""
"""Builds a FieldQuery object.
The "params" parameter are the three parts of the structured
search term pulled apart during parsing.
The "query" parameter is the Query object which contains the
fourth field (the token) of the structured search."""
# For efficiency, especially on queries which search over
# '*', instead of filtering the results, this class makes
# modifications to its child class so that it will do the
# needed filtering as it does the search.
return "( PN:{0!r} AT:{1!r} ST:{2!r} Q:{3!r})".format(
"""This function passes information to the terms prior to
search being executed. It only needs to pass whatever
information exists to its child."""
"""Perform a search for the structured query. The child has
been modified so that it is able to do the structured query
directly."""
"""Returns whether the query supports a query of version v."""
"""Inserts a conversion to package results into the tree.
Creates a new node by wrapping a PkgConversion node around
itself. It then returns the new node to its parent for
insertion into the tree."""
return PkgConversion(self)
"""Class which must be at the top of all valid ASTs, and may only be
at the top of an AST. It handles starting N results in, or only
showing M items. It also transforms the internal representations of
results into the format expected by the callers of search."""
self.num_to_return = None
"""Determines whether the x'th result should be returned."""
return x >= self.start_point and \
(self.num_to_return is None or
"""Converts the internal result representation to the format
which is expected by the callers of search. It also handles
returning only those results requested by the user."""
# Need to replace "1" with current search version, or something
# similar
return (
)
else:
return (
for x, pfmri
)
"""This function passes information to the terms prior to
search being executed. This is also where the starting point
and number of results to return is set. Both "num_to_return"
and "start_point" are expected to be integers."""
if start_point:
"""Perform search by taking the result of the child's search
and transforming and subselecting the results. None is passed
to the child since initially there is no set of results to
restrict subsequent searches to."""
"""Returns whether the query supports a query of version v."""
"""Makes the child return packages instead of actions.
If a child returns a value that isn't None, that means a new
node in the tree has been created which needs to become the
new child for this node."""
if new_child:
return None
"""Class representing the a single query term in the AST. This is an
abstract class and should not be used instead of the related client and
server classes."""
# This structure was used to gather all index files into one
# location. If a new index structure is needed, the files can
# be added (or removed) from here. Providing a list or
# dictionary allows an easy approach to opening or closing all
# index files.
__dict_locks = {}
fmris = None
"""term is a the string for the token to be searched for."""
# This block of options is used by FieldQuery to limit the
# domain of search.
self.action_type = None
self.pkg_name_match = None
self._case_sensitive = None
# These variables are set by set_info and are used to hold
# information specific to the particular search index that this
# AST is being built for.
self._manifest_path_func = None
self._data_manf = None
self._data_token_offset = None
self._data_main_dict = None
return
# Setup default global dictionary for this index path.
}
# This lock is used so that only one instance of a term query
# object is ever modifying the class wide variable for this
# index.
return ":".join([
]
])
"""Inserts a conversion to package results into the tree.
Creates a new node by wrapping a PkgConversion node around
itself. It then returns the new node to its parent for
insertion into the tree."""
return PkgConversion(self)
if wc:
return ""
"""Dump any cached index data for specified index path."""
try:
except KeyError:
pass
finally:
"""Add the information needed to restrict the search domain
to the specified fields."""
# Because users will rarely want to search on package names
# by fully specifying them out to their time stamps, package
# name search is treated as a prefix match. To accomplish
# this, a star is appended to the package name if it doesn't
# already end in one.
if not self.pkg_name_wildcard:
self.pkg_name_match = \
def __is_wildcard(s):
return s == '*' or s == ''
"""Ensures that the search is a prefix match. Primarily used
by the PhraseQuery class."""
case_sensitive, **kwargs):
"""Sets the information needed to search which is specific to
the particular index used to back the search.
'index_dir' is a path to the base directory of the index.
'get_manifest_path' is a function which when given a
fully specified fmri returns the path to the manifest file
for that fmri.
'case_sensitive' is a boolean which determines whether search
is case sensitive or not."""
# Take the static class lock because it's possible we'll
# modify the shared dictionaries for the class.
try:
self._data_main_dict = \
if "fmri_offsets" not in tq_gdd:
ss.FMRI_OFFSETS_FILE, None)
# Create a temporary list of dictionaries we need to
# open consistently.
try:
# Try to open the index files assuming they
# were made after the conversion to using the
# fmri_offsets.v1 file.
# If opening the index fails, try falling
# back to the index prior to the conversion
# to using the fmri_offsets.v1 file.
del tq_gdd["fmri_offsets"]
if ret == None:
raise search_errors.NoIndexException(
# Check to see if any of the in-memory stores of the
# dictionaries are out of date compared to the ones
# on disc.
if d.should_reread():
break
try:
# If any of the in-memory dictionaries are out
# of date, create new copies of the shared
# dictionaries and make the shared structure
# point at them. This is to prevent changing
# the data structures in any other threads
# which may be part way through executing a
# search.
if should_reread:
for i in tmp:
dict([
for k, d
])
try:
if ret == None:
raise search_errors.NoIndexException(
# Reread the dictionaries and
# store the new information in
# the shared data structure.
d.read_dict_file()
except:
raise
finally:
# Ensure that the files are closed no matter
# what happens.
None)
finally:
"""Returns whether the query supports a query of version v."""
return True
"""Closes the main dictionary file handle, which is handled
separately from the other dictionaries since it's not read
entirely into memory in one shot."""
"""Takes a list which may contain one or more sublists and
returns a list which contains all the items contained in the
original list and its sublists, but without any sublists."""
res = []
for l in lst:
if isinstance(l, list):
else:
return res
"""Searches for the given term within a restricted domain of
search results. restriction is a generator function that
returns actions within the search domain."""
if not case_sensitive:
# Check if the current action doesn't match any field
# query specifications.
if not (self.pkg_name_wildcard or
not (self.action_type_wildcard or
continue
l = l.strip()
# Find the possible tokens for this action.
matches = []
if glob:
# If this search term matches any of the tokens the
# action could generate, yield it. If not, continue
# on to the next action.
if matches:
"""Takes a group of byte offsets into the main dictionary and
reads the lines starting at those byte offsets."""
"""Legacy function used to search indexes which have a pkg
directory with fmri offset information instead of the
fmri_offsets.v1 file."""
pkg_offsets = set()
for matching_pfmri in (
p
if self.pkg_name_match(
):
try:
except EnvironmentError as e:
raise
continue
for l in fh:
return pkg_offsets
"""Searches the indexes in dir_path for any matches of query
and the results in self.res. The method assumes the
dictionaries have already been loaded and read appropriately.
The "fmris" parameter is a generator of fmris of installed
packages."""
if not case_sensitive:
# If offsets is equal to None, match all possible results. A
# match with no results is represented by an empty set.
offsets = None
if glob:
# If the term has at least one non-wildcard character
# in it, do the glob search.
])
else:
# Close the dictionaries since there are
# no more results to yield.
return
# Restrict results by package name.
if not self.pkg_name_wildcard:
try:
pkg_offsets = \
except AttributeError:
if offsets is None:
else:
# Restrict results by action type.
if not self.action_type_wildcard:
try:
for l in fh:
if offsets is None:
else:
except EnvironmentError as e:
raise
# If the file doesn't exist, then no actions
# with that action type were indexed.
# Restrict results by key.
if not self.key_wildcard:
try:
for l in fh:
if offsets is None:
else:
except EnvironmentError as e:
raise
# If the file doesn't exist, then no actions
# with that key were indexed.
# If offsets isn't None, then the set of results has been
# restricted so iterate through those offsets.
if offsets is not None:
# If offsets is None and the term was only wildcard search
# tokens, return results for every known token.
elif glob and \
assert not line == '\n'
# Check that the token was what was expected.
(not case_sensitive and
(not case_sensitive and
# Check the various action types this token was
# associated with.
if not self.action_type_wildcard and \
continue
# Check the key types this token and action type
# were associated with.
if not self.key_wildcard and \
continue
# Get the values which matched this
# token.
# Get the fmri id and list of
# offsets into the manifest for
# that fmri id.
# Check that the pkg
# name matches the
# restrictions, if any
# exist.
continue
int_os = [
int(o)
for o
in m_off_set
]
# Close the dictionaries since there are no more results to
# yield.
return
"""Takes the results from search_internal ("res") and reads the
lines from the manifest files at the provided offsets."""
send_res = []
# XXX If action size changes substantially, we should
# reexamine whether the buffer size should be changed.
file_handle.seek(o)
file_handle.readline()))