""" We represent here a push-down automaton """
import json
from typing import AbstractSet, List, Iterable, Any
from itertools import product
import numpy as np
import networkx as nx
from networkx.drawing.nx_pydot import write_dot
from pyformlang.pda.cfg_variable_converter import CFGVariableConverter
from pyformlang import finite_automaton
from pyformlang import regular_expression
from pyformlang import cfg
from .state import State
from .symbol import Symbol
from .stack_symbol import StackSymbol
from .epsilon import Epsilon
from .transition_function import TransitionFunction
from .utils import PDAObjectCreator
from ..finite_automaton import FiniteAutomaton
from ..finite_automaton.finite_automaton import add_start_state_to_graph
INPUT_SYMBOL = 1
STACK_FROM = 2
INPUT = 0
STATE = 0
NEW_STACK = 1
OUTPUT = 1
[docs]class PDA:
""" Representation of a pushdown automaton
Parameters
----------
states : set of :class:`~pyformlang.pda.State`, optional
A finite set of states
input_symbols : set of :class:`~pyformlang.pda.Symbol`, optional
A finite set of input symbols
stack_alphabet : set of :class:`~pyformlang.pda.StackSymbol`, optional
A finite stack alphabet
transition_function : :class:`~pyformlang.pda.TransitionFunction`, optional
Takes as arguments a state, an input symbol and a stack symbol and
returns a state and a string of stack symbols push on the stacked to
replace X
start_state : :class:`~pyformlang.pda.State`, optional
A start state, element of states
start_stack_symbol : :class:`~pyformlang.pda.StackSymbol`, optional
The stack is initialized with this stack symbol
final_states : set of :class:`~pyformlang.pda.State`, optional
A set of final or accepting states. It is a subset of states.
"""
# pylint: disable=too-many-instance-attributes
def __init__(self,
states: AbstractSet[State] = None,
input_symbols: AbstractSet[Symbol] = None,
stack_alphabet: AbstractSet[StackSymbol] = None,
transition_function: TransitionFunction = None,
start_state: State = None,
start_stack_symbol: StackSymbol = None,
final_states: AbstractSet[State] = None):
# pylint: disable=too-many-arguments
self._pda_obj_creator = PDAObjectCreator()
if states is not None:
states = {self._pda_obj_creator.to_state(x) for x in states}
if input_symbols is not None:
input_symbols = {self._pda_obj_creator.to_symbol(x)
for x in input_symbols}
if stack_alphabet is not None:
stack_alphabet = {self._pda_obj_creator.to_stack_symbol(x)
for x in stack_alphabet}
if start_state is not None:
start_state = self._pda_obj_creator.to_state(start_state)
if start_stack_symbol is not None:
start_stack_symbol = \
self._pda_obj_creator.to_stack_symbol(start_stack_symbol)
if final_states is not None:
final_states = {self._pda_obj_creator.to_state(x)
for x in final_states}
self._states = states or set()
self._states = set(self._states)
self._input_symbols = input_symbols or set()
self._input_symbols = set(self._input_symbols)
self._stack_alphabet = stack_alphabet or set()
self._stack_alphabet = set(self._stack_alphabet)
self._transition_function = transition_function or TransitionFunction()
self._start_state = start_state
if start_state is not None:
self._states.add(start_state)
self._start_stack_symbol = start_stack_symbol
if start_stack_symbol is not None:
self._stack_alphabet.add(start_stack_symbol)
self._final_states = final_states or set()
self._final_states = set(self._final_states)
for state in self._final_states:
self._states.add(state)
self._cfg_variable_converter = None
[docs] def set_start_state(self, start_state: State):
""" Sets the start state to the automaton
Parameters
----------
start_state : :class:`~pyformlang.pda.State`
The start state
"""
start_state = self._pda_obj_creator.to_state(start_state)
self._states.add(start_state)
self._start_state = start_state
[docs] def set_start_stack_symbol(self, start_stack_symbol: StackSymbol):
""" Sets the start stack symbol to the automaton
Parameters
----------
start_stack_symbol : :class:`~pyformlang.pda.StackSymbol`
The start stack symbol
"""
start_stack_symbol = self._pda_obj_creator.to_stack_symbol(
start_stack_symbol)
self._stack_alphabet.add(start_stack_symbol)
self._start_stack_symbol = start_stack_symbol
[docs] def add_final_state(self, state: State):
""" Adds a final state to the automaton
Parameters
----------
state : :class:`~pyformlang.pda.State`
The state to add
"""
state = self._pda_obj_creator.to_state(state)
self._final_states.add(state)
@property
def start_state(self):
""" Get start state """
return self._start_state
@property
def states(self):
"""
Get the states fo the PDA
Returns
-------
states : iterable of :class:`~pyformlang.pda.State`
The states of the PDA
"""
return self._states
@property
def final_states(self):
"""
The final states of the PDA
Returns
-------
final_states : iterable of :class:`~pyformlang.pda.State`
The final states of the PDA
"""
return self._final_states
@property
def input_symbols(self):
"""
The input symbols of the PDA
Returns
-------
input_symbols : iterable of :class:`~pyformlang.pda.Symbol`
The input symbols of the PDA
"""
return self._input_symbols
@property
def stack_symbols(self):
"""
The stack symbols of the PDA
Returns
-------
stack_symbols : iterable of :class:`~pyformlang.pda.StackSymbol`
The stack symbols of the PDA
"""
return self._stack_alphabet
[docs] def get_number_transitions(self) -> int:
""" Gets the number of transitions in the PDA
Returns
----------
n_transitions : int
The number of transitions
"""
return self._transition_function.get_number_transitions()
[docs] def add_transitions(self, transitions):
"""
Adds several transitions
Parameters
----------
transitions :
Transitions as they would be given to add_transition
"""
for s_from, input_symbol, stack_from, s_to, stack_to in transitions:
self.add_transition(s_from, input_symbol, stack_from,
s_to, stack_to)
[docs] def add_transition(self,
s_from: State,
input_symbol: Symbol,
stack_from: StackSymbol,
s_to: State,
stack_to: List[StackSymbol]):
""" Add a transition to the PDA
Parameters
----------
s_from : :class:`~pyformlang.pda.State`
The starting symbol
input_symbol : :class:`~pyformlang.pda.Symbol`
The input symbol for the transition
stack_from : :class:`~pyformlang.pda.StackSymbol`
The stack symbol of the transition
s_to : :class:`~pyformlang.pda.State`
The new state
stack_to : list of :class:`~pyformlang.pda.StackSymbol`
The string of stack symbol which replace the stack_from
"""
# pylint: disable=too-many-arguments
s_from = self._pda_obj_creator.to_state(s_from)
input_symbol = self._pda_obj_creator.to_symbol(input_symbol)
stack_from = self._pda_obj_creator.to_stack_symbol(stack_from)
s_to = self._pda_obj_creator.to_state(s_to)
stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to]
self._states.add(s_from)
self._states.add(s_to)
if input_symbol != Epsilon():
self._input_symbols.add(input_symbol)
self._stack_alphabet.add(stack_from)
for stack_symbol in stack_to:
if stack_symbol != Epsilon():
self._stack_alphabet.add(stack_symbol)
self._transition_function.add_transition(s_from,
input_symbol,
stack_from,
s_to,
stack_to)
[docs] def to_final_state(self) -> "PDA":
""" Turns the current PDA that accepts a language L by empty stack \
to another PDA that accepts the same language L by final state
Returns
----------
new_pda : :class:`~pyformlang.pda.PDA`
The new PDA which accepts by final state the language that \
was accepted by empty stack
"""
new_start = get_next_free("#STARTTOFINAL#", State, self._states)
new_end = get_next_free("#ENDTOFINAL#", State, self._states)
new_stack_symbol = get_next_free("#BOTTOMTOFINAL#",
StackSymbol,
self._stack_alphabet)
new_states = self._states.copy()
new_states.add(new_start)
new_states.add(new_end)
new_stack_alphabet = self._stack_alphabet.copy()
new_stack_alphabet.add(new_stack_symbol)
new_tf = self._transition_function.copy()
new_tf.add_transition(new_start, Epsilon(), new_stack_symbol,
self._start_state, [self._start_stack_symbol,
new_stack_symbol])
for state in self._states:
new_tf.add_transition(state, Epsilon(), new_stack_symbol,
new_end, [])
return PDA(new_states,
self._input_symbols.copy(),
new_stack_alphabet,
new_tf,
new_start,
new_stack_symbol,
{new_end})
[docs] def to_empty_stack(self) -> "PDA":
""" Turns the current PDA that accepts a language L by final state to \
another PDA that accepts the same language L by empty stack
Returns
----------
new_pda : :class:`~pyformlang.pda.PDA`
The new PDA which accepts by empty stack the language that was \
accepted by final state
"""
new_start = get_next_free("#STARTEMPTYS#", State, self._states)
new_end = get_next_free("#ENDEMPTYS#", State, self._states)
new_stack_symbol = get_next_free("#BOTTOMEMPTYS#",
StackSymbol,
self._stack_alphabet)
new_states = self._states.copy()
new_states.add(new_start)
new_states.add(new_end)
new_stack_alphabet = self._stack_alphabet.copy()
new_stack_alphabet.add(new_stack_symbol)
new_tf = self._transition_function.copy()
new_tf.add_transition(new_start, Epsilon(), new_stack_symbol,
self._start_state, [self._start_stack_symbol,
new_stack_symbol])
for state in self._final_states:
for stack_symbol in new_stack_alphabet:
new_tf.add_transition(state, Epsilon(), stack_symbol,
new_end, [])
for stack_symbol in new_stack_alphabet:
new_tf.add_transition(new_end, Epsilon(), stack_symbol,
new_end, [])
return PDA(new_states,
self._input_symbols.copy(),
new_stack_alphabet,
new_tf,
new_start,
new_stack_symbol)
[docs] def to_cfg(self) -> "cfg.CFG":
""" Turns the language L generated by this PDA when accepting \
on empty \
stack into a CFG that accepts the same language L
Returns
----------
new_cfg : :class:`~pyformlang.cfg.CFG`
The equivalent CFG
"""
self._cfg_variable_converter = \
CFGVariableConverter(self._states, self._stack_alphabet)
start = cfg.Variable("#StartCFG#")
productions = self._initialize_production_from_start_in_to_cfg(start)
states = self._states
for transition in self._transition_function:
for state in states:
self._cfg_variable_converter.set_valid(
transition[INPUT][STATE],
transition[INPUT][STACK_FROM],
state)
for transition in self._transition_function:
for state in states:
self._process_transition_and_state_to_cfg(productions,
state,
transition)
return cfg.CFG(start_symbol=start, productions=productions)
def _process_transition_and_state_to_cfg(self,
productions,
state,
transition):
current_state_has_empty_new_stack = \
len(transition[OUTPUT][NEW_STACK]) == 0 and \
state != transition[OUTPUT][STATE]
if not current_state_has_empty_new_stack:
self._process_transition_and_state_to_cfg_safe(productions, state,
transition)
def _process_transition_and_state_to_cfg_safe(self, productions, state,
transition):
head = self._get_head_from_state_and_transition(state, transition)
bodies = self._get_all_bodies_from_state_and_transition(state,
transition)
if transition[INPUT][INPUT_SYMBOL] != Epsilon():
_prepend_input_symbol_to_the_bodies(bodies, transition)
for body in bodies:
productions.append(cfg.Production(head, body, filtering=False))
def _get_all_bodies_from_state_and_transition(self, state, transition):
return self._generate_all_rules(transition[OUTPUT][STATE],
state,
transition[OUTPUT][NEW_STACK])
def _generate_all_rules(self, s_from: State, s_to: State,
ss_by: List[StackSymbol]) \
-> Iterable[Iterable["cfg.Variable"]]:
""" Generates the rules in the CFG conversion """
if not ss_by:
return [[]]
if len(ss_by) == 1:
return self._generate_length_one_rules(s_from, s_to, ss_by)
res = []
is_valid_and_get = self._cfg_variable_converter.is_valid_and_get
append_to_res = res.append
length_ss_by_minus_one = len(ss_by) - 1
for states in product(self._states, repeat=length_ss_by_minus_one):
last_one = s_from
temp = []
stopped = False
for i in range(length_ss_by_minus_one):
new_variable = is_valid_and_get(last_one,
ss_by[i],
states[i])
if new_variable is None:
stopped = True
break
temp.append(new_variable)
last_one = states[i]
if stopped:
continue
new_variable = is_valid_and_get(last_one, ss_by[-1], s_to)
if new_variable is None:
continue
temp.append(new_variable)
append_to_res(temp)
return res
def _generate_length_one_rules(self, s_from, s_to, ss_by):
state = self._cfg_variable_converter.is_valid_and_get(s_from, ss_by[0],
s_to)
if state is not None:
return [[state]]
return []
def _get_head_from_state_and_transition(self, state, transition):
return self._cfg_variable_converter.to_cfg_combined_variable(
transition[INPUT][STATE],
transition[INPUT][STACK_FROM],
state)
def _initialize_production_from_start_in_to_cfg(self, start):
productions = []
for state in self._states:
productions.append(
cfg.Production(
start,
[self._cfg_variable_converter.to_cfg_combined_variable(
self._start_state,
self._start_stack_symbol,
state)]))
return productions
[docs] def intersection(self, other: Any) -> "PDA":
""" Gets the intersection of the language L generated by the \
current PDA when accepting by final state with something else
Currently, it only works for regular languages (represented as \
regular expressions or finite automata) as the intersection \
between two PDAs is not context-free (it cannot be represented \
with a PDA).
Equivalent to:
>> pda and regex
Parameters
----------
other : any
The other part of the intersection
Returns
----------
new_pda : :class:`~pyformlang.pda.PDA`
The pda resulting of the intersection
Raises
----------
NotImplementedError
When intersecting with something else than a regex or a finite
automaton
"""
if isinstance(other, regular_expression.Regex):
enfa = other.to_epsilon_nfa()
other = enfa.to_deterministic()
elif isinstance(other, FiniteAutomaton):
is_deterministic = other.is_deterministic()
if not is_deterministic:
other = other.to_deterministic()
else:
raise NotImplementedError
start_state_other = other.start_states
if len(start_state_other) == 0:
return PDA()
pda_state_converter = _PDAStateConverter(self._states, other.states)
start_state_other = list(start_state_other)[0]
final_state_other = other.final_states
start = pda_state_converter.to_pda_combined_state(self._start_state,
start_state_other)
pda = PDA(start_state=start,
start_stack_symbol=self._start_stack_symbol)
symbols = self._input_symbols.copy()
symbols.add(Epsilon())
to_process = [(self._start_state, start_state_other)]
processed = {(self._start_state, start_state_other)}
while to_process:
state_in, state_dfa = to_process.pop()
if (state_in in self._final_states and state_dfa in
final_state_other):
pda.add_final_state(
pda_state_converter.to_pda_combined_state(state_in,
state_dfa))
for symbol in symbols:
if symbol == Epsilon():
symbol_dfa = finite_automaton.Epsilon()
else:
symbol_dfa = finite_automaton.Symbol(symbol.value)
if symbol == Epsilon():
next_states_dfa = [state_dfa]
else:
next_states_dfa = other(state_dfa, symbol_dfa)
if len(next_states_dfa) == 0:
continue
for stack_symbol in self._stack_alphabet:
next_states_self = self._transition_function(state_in,
symbol,
stack_symbol)
for next_state, next_stack in next_states_self:
for next_state_dfa in next_states_dfa:
pda.add_transition(
pda_state_converter.to_pda_combined_state(
state_in,
state_dfa),
symbol,
stack_symbol,
pda_state_converter.to_pda_combined_state(
next_state,
next_state_dfa),
next_stack)
if (next_state, next_state_dfa) not in processed:
to_process.append((next_state, next_state_dfa))
processed.add((next_state, next_state_dfa))
return pda
def __and__(self, other):
""" Gets the intersection of the current PDA with something else
Equivalent to:
>> pda and regex
Parameters
----------
other : any
The other part of the intersection
Returns
----------
new_pda : :class:`~pyformlang.pda.PDA`
The pda resulting of the intersection
Raises
----------
NotImplementedError
When intersecting with something else than a regex or a finite
automaton
"""
return self.intersection(other)
[docs] def to_dict(self):
"""
Get the transitions of the PDA as a dictionary
Returns
-------
transitions : dict
The transitions
"""
return self._transition_function.to_dict()
[docs] def to_networkx(self) -> nx.MultiDiGraph:
"""
Transform the current pda into a networkx graph
Returns
-------
graph : networkx.MultiDiGraph
A networkx MultiDiGraph representing the pda
"""
graph = nx.MultiDiGraph()
for state in self._states:
graph.add_node(state.value,
is_start=state == self._start_state,
is_final=state in self.final_states,
peripheries=2 if state in self.final_states else 1,
label=state.value)
if state == self._start_state:
add_start_state_to_graph(graph, state)
if self._start_stack_symbol is not None:
graph.add_node("INITIAL_STACK_HIDDEN",
label=json.dumps(self._start_stack_symbol.value),
shape=None,
height=.0,
width=.0)
for key, value in self._transition_function:
s_from, in_symbol, stack_from = key
s_to, stack_to = value
graph.add_edge(
s_from.value,
s_to.value,
label=(json.dumps(in_symbol.value) + " -> " +
json.dumps(stack_from.value) + " / " +
json.dumps([x.value for x in stack_to])))
return graph
[docs] @classmethod
def from_networkx(cls, graph):
"""
Import a networkx graph into a PDA. \
The imported graph requires to have the good format, i.e. to come \
from the function to_networkx
Parameters
----------
graph :
The graph representation of the PDA
Returns
-------
pda :
A PDA automaton read from the graph
TODO
-------
* Explain the format
"""
pda = PDA()
for s_from in graph:
if isinstance(s_from, str) and s_from.startswith("starting_"):
continue
for s_to in graph[s_from]:
for transition in graph[s_from][s_to].values():
if "label" in transition:
in_symbol, stack_info = transition["label"].split(
" -> ")
in_symbol = json.loads(in_symbol)
stack_from, stack_to = stack_info.split(" / ")
stack_from = json.loads(stack_from)
stack_to = json.loads(stack_to)
pda.add_transition(s_from,
in_symbol,
stack_from,
s_to,
stack_to)
for node in graph.nodes:
if graph.nodes[node].get("is_start", False):
pda.set_start_state(node)
if graph.nodes[node].get("is_final", False):
pda.add_final_state(node)
if "INITIAL_STACK_HIDDEN" in graph.nodes:
pda.set_start_stack_symbol(
json.loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"]))
return pda
[docs] def write_as_dot(self, filename):
"""
Write the PDA in dot format into a file
Parameters
----------
filename : str
The filename where to write the dot file
"""
write_dot(self.to_networkx(), filename)
def _prepend_input_symbol_to_the_bodies(bodies, transition):
to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value)
for body in bodies:
body.insert(0, to_prepend)
class _PDAStateConverter:
# pylint: disable=too-few-public-methods
def __init__(self, states_pda, states_dfa):
self._inverse_state_pda = {}
for i, state in enumerate(states_pda):
self._inverse_state_pda[state] = i
self._inverse_state_dfa = {}
for i, state in enumerate(states_dfa):
self._inverse_state_dfa[state] = i
self._conversions = np.empty((len(states_pda), len(states_dfa)),
dtype=object)
def to_pda_combined_state(self, state_pda, state_other):
""" To PDA state in the intersection function """
i_state_pda = self._inverse_state_pda[state_pda]
i_state_other = self._inverse_state_dfa[state_other]
if self._conversions[i_state_pda, i_state_other] is None:
self._conversions[i_state_pda, i_state_other] = State(
(state_pda, state_other))
return self._conversions[i_state_pda, i_state_other]
def get_next_free(prefix, type_generating, to_check):
""" Get free next state or symbol """
idx = 0
new_var = type_generating(prefix)
while new_var in to_check:
new_var = type_generating(prefix + str(idx))
idx += 1
return new_var