Source code for pyformlang.fcfg.feature_structure

"""Feature Structure"""
from typing import Any, List, Dict


[docs] class ContentAlreadyExistsException(Exception): """Exception raised when we want to add a content that already exists"""
[docs] class PathDoesNotExistsException(Exception): """Raised when looking for a path that does not exist"""
[docs] class FeatureStructuresNotCompatibleException(Exception): """Raised when trying to unify uncompatible structures"""
[docs] class FeatureStructure: """ The feature structure containing constraints Parameters ---------- value : Any, optional The value of the feature, if defined """ def __init__(self, value=None): self._content = {} self._value = value self._pointer = None
[docs] def copy(self, already_copied=None): """Copies the current feature structure Parameters ---------- already_copied : dict A dictionary containing the parts already copied. For internal usage. Returns ---------- fs : :class:`~pyformlang.fcfg.FeatureStructure` The copied feature structure """ if already_copied is None: already_copied = {} if self in already_copied: return already_copied[self] new_fs = FeatureStructure(self.value) if self._pointer is not None: pointer_copy = self._pointer.copy(already_copied) new_fs.pointer = pointer_copy for feature, content in self._content.items(): new_fs.content[feature] = content.copy(already_copied) already_copied[self] = new_fs return new_fs
@property def content(self) -> Any: """Gets the content of the current node""" return self._content @property def pointer(self) -> Any: """Gets the pointer of the current node""" return self._pointer @pointer.setter def pointer(self, new_pointer): """Set the value of the pointer""" self._pointer = new_pointer @property def value(self) -> Any: """Gets the value associated to the current node""" return self._value if self.pointer is None else self.pointer.value @value.setter def value(self, new_value) -> Any: """Gets the value associated to the current node""" self._value = new_value
[docs] def add_content(self, content_name: str, feature_structure: "FeatureStructure"): """Add content to the current feature structure. Parameters ---------- content_name : str The name of the new feature feature_structure : :class:`~pyformlang.fcfg.FeatureStructure` The value of this new feature Raises ---------- ContentAlreadyExistsException When the feature already exists """ if content_name in self._content: raise ContentAlreadyExistsException() self._content[content_name] = feature_structure
[docs] def add_content_path(self, content_name: str, feature_structure: "FeatureStructure", path: List[str]): """Add content to the current feature structure at a specific path Parameters ---------- content_name : str The name of the new feature feature_structure : :class:`~pyformlang.fcfg.FeatureStructure` The value of this new feature path : Iterable of str The path where to add the new feature. Raises ---------- ContentAlreadyExistsException When the feature already exists PathDoesNotExistsException When the path does not exist """ to_modify = self.get_feature_by_path(path) to_modify.add_content(content_name, feature_structure)
[docs] def get_dereferenced(self): """Get the dereferences version of the feature structure. For internal usage.""" return self._pointer.get_dereferenced() if self._pointer is not None else self
[docs] def get_feature_by_path(self, path: List[str] = None): """ Get a feature at a given path. Parameters ----------- path : Iterable of str, optional The path to the new feature. Returns ------- feature_structure : :class:`~pyformlang.fcfg.FeatureStructure` The feature structure at the end of the path. Raises ---------- PathDoesNotExistsException When the path does not exist """ if not path or path is None: return self current = self.get_dereferenced() if path[0] not in current.content: raise PathDoesNotExistsException() return current.content[path[0]].get_feature_by_path(path[1:])
[docs] def unify(self, other: "FeatureStructure"): """Unify the current structure with another one. Modifies the current structure. Parameters ---------- other : :class:`~pyformlang.fcfg.FeatureStructure` The other feature structure to unify. Raises ---------- FeatureStructuresNotCompatibleException When the feature structure cannot be unified. """ current_dereferenced = self.get_dereferenced() other_dereferenced = other.get_dereferenced() if current_dereferenced == other_dereferenced: return if len(current_dereferenced.content) == 0 and len(other_dereferenced.content) == 0: # We have a simple feature if current_dereferenced.value == other_dereferenced.value: current_dereferenced.pointer = other_dereferenced elif current_dereferenced.value is None: current_dereferenced.pointer = other_dereferenced elif other_dereferenced.value is None: other_dereferenced.pointer = current_dereferenced else: raise FeatureStructuresNotCompatibleException() else: other_dereferenced.pointer = current_dereferenced for feature in other_dereferenced.content: if feature not in current_dereferenced.content: current_dereferenced.content[feature] = FeatureStructure() current_dereferenced.content[feature].unify(other_dereferenced.content[feature])
[docs] def subsumes(self, other: "FeatureStructure"): """Check whether the current feature structure subsumes another one. Parameters ---------- other : :class:`~pyformlang.fcfg.FeatureStructure` The other feature structure to unify. Returns ---------- subsumes : bool Whether the current feature structure subsumes the one. """ current_dereferenced = self.get_dereferenced() other_dereferenced = other.get_dereferenced() if current_dereferenced.value != other_dereferenced.value: return False for feature in current_dereferenced.content: if feature not in other_dereferenced.content: return False if not current_dereferenced.content[feature].subsumes(other_dereferenced.content[feature]): return False return True
[docs] def get_all_paths(self): """ Get the list of all path in the feature structure Returns -------- paths : Iterable of :class:`~pyformlang.fcfg.FeatureStructure` The paths """ res = [] for feature, content in self._content.items(): paths = content.get_all_paths() for path in paths: res.append([feature] + path) if not res: res.append([]) return res
def __repr__(self): res = [] for path in self.get_all_paths(): if path: feature = self.get_feature_by_path(path) value = feature.value if value is None: value = id(feature) res.append(".".join(path) + "=" + str(value)) return " | ".join(res)
[docs] @classmethod def from_text(cls, text: str, structure_variables: Dict[str, "FeatureStructure"] = None): """ Construct a feature structure from a text. Parameters ----------- text : str The text to parse structure_variables : dict of (str, :class:`~pyformlang.fcfg.FeatureStructure`), optional Existing structure variables. Returns -------- feature_structure : :class:`~pyformlang.fcfg.FeatureStructure` The parsed feature structure """ if structure_variables is None: structure_variables = {} preprocessed_conditions = _preprocess_conditions(text) return _create_feature_structure(preprocessed_conditions, structure_variables)
def _find_closing_bracket(condition, start, opening="[", closing="]"): counter = 0 pos = start for current_char in condition[start:]: if current_char == opening: counter += 1 elif current_char == closing: counter -= 1 if counter == 0: return pos pos += 1 return -1 class ParsingException(Exception): """When there is a problem during parsing.""" def _preprocess_conditions(conditions, start=0, end=-1): conditions = conditions.replace("->", "=") conditions = conditions.strip() res = [] reading_feature = True current_feature = "" current_value = "" reference = None pos = start end = len(conditions) if end == -1 else end while pos < end: current = conditions[pos] if current == "=": reading_feature = False pos += 1 elif reading_feature: current_feature += current pos += 1 elif current == "[": end_bracket = _find_closing_bracket(conditions, pos) if end_bracket == -1: raise ParsingException() current_value = _preprocess_conditions(conditions, pos + 1, end_bracket) pos = end_bracket + 1 elif current == "(": end_bracket = _find_closing_bracket(conditions, pos, "(", ")") if end_bracket == -1: raise ParsingException() reference = conditions[pos+1: end_bracket] pos = end_bracket + 1 elif current == ",": reading_feature = True if isinstance(current_value, str): current_value = current_value.strip() res.append((current_feature.strip(), current_value, reference)) current_feature = "" current_value = "" reference = None pos += 1 else: current_value += current pos += 1 if current_feature.strip(): if isinstance(current_value, str): current_value = current_value.strip() res.append((current_feature.strip(), current_value, reference)) return res def _create_feature_structure(conditions, structure_variables, existing_references=None, feature_structure=None): if existing_references is None: existing_references = {} if feature_structure is None: feature_structure = FeatureStructure() for feature, value, reference in conditions: if reference is not None: if reference not in existing_references: existing_references[reference] = FeatureStructure() new_fs = existing_references[reference] else: new_fs = FeatureStructure() if value and isinstance(value, str): if value[0] != "?": new_fs.value = value feature_structure.add_content(feature, new_fs) elif value[1:] in structure_variables: new_fs.pointer = structure_variables[value[1:]] feature_structure.add_content(feature, new_fs) else: feature_structure.add_content(feature, new_fs) structure_variables[value[1:]] = new_fs elif not isinstance(value, str): structure = _create_feature_structure(value, structure_variables, existing_references, new_fs) feature_structure.add_content(feature, structure) else: feature_structure.add_content(feature, new_fs) return feature_structure