Nlp.py
Allikas: Lambda
# nlp.py
#
# this is a trivial English-to-reasoner-input converter.
#
# Steps in parsing:
# split the text into sentences and then sentences to words (tokens)
# then parse the .-ending and ?-ending sentences separately.
# Sentences are assumed to be either with the fixed structure like
#
# John is a father of Andrew.
#
# where we look for word followed by "of" as predicate
# and assume the first and last words are arguments, hence
# we build
# ["cfather","cjohn","candrew"]
# and we convert to reasoner line
# cfather(cJohn,cAndrew).
# where the "c" is for avoiding capitals to
# thought of as variables.
#
# or
#
# Andrew is a man.
# where we check that it contains no "of"
# and assume the last word is the single-arg predicate
# and the firs is its arg
# we build
# ["cman","candrew"]
# and we convert to reasoner line
# cman(cAndrew).
#
# or questions like
#
# Who is the father of Andrew?
#
# where we check if the first word is "who" or "what"
# and assume the last word is the second argument
# we build
# [["-cfather","X1","candrew"],["ans","X1"]]
# and we convert to the reasoner line
# -cfather(X1,cAndrew) | ans(X1).
intext="John is a father of Andrew. Andrew is a man. Who is the father of Andrew?"
def main(txt):
tmp=parse(txt)
reasonertext=""
for tokenlst in tmp:
sentparsed=parse_sentence(tokenlst)
#print("sentparsed in main",sentparsed)
reasonerline=make_reasoner_line(sentparsed)
reasonertext+=reasonerline+"\n"
print(reasonertext)
def parse(txt):
tmp=txt.replace("."," . ")
tmp=tmp.replace("?"," ? ")
tmp=tmp.replace("!"," ! ")
tokens=tmp.split()
sentences=[]
sent=[]
for token in tokens:
if token in [".","?","!"]:
sent.append(token)
sentences.append(sent)
sent=[]
else:
sent.append(token)
return sentences
def parse_sentence(tokenlst):
if tokenlst[-1]=="?":
return parse_question_sentence(tokenlst[:-1])
else:
return parse_fact_sentence(tokenlst[:-1])
def parse_fact_sentence(tokenlst):
#print("tokenlst for parse_fact_sentence",tokenlst)
if not("of" in tokenlst):
return parse_type_fact_sentence(tokenlst)
of_loc=tokenlst.index("of")
pred=makeconst(tokenlst[of_loc-1])
arg1=makeconst(tokenlst[0])
arglast=makeconst(tokenlst[-1])
return [pred,arg1,arglast]
def parse_type_fact_sentence(tokenlst):
#print("tokenlst for parse_type_fact_sentence",tokenlst)
if "of" in tokenlst: return None
pred=makeconst(tokenlst[-1])
arg1=makeconst(tokenlst[0])
return [pred,arg1]
def parse_question_sentence(tokenlst):
#print("tokenlst for parse_question_sentence",tokenlst)
of_loc=tokenlst.index("of")
if not of_loc: return None
pred=makeconst(tokenlst[int(of_loc)-1])
arg1="X1"
arglast=makeconst(tokenlst[-1])
return [["-"+pred,arg1,arglast],["ans","X1"]]
def make_reasoner_line(lst):
#print("make_reasoner_line input",lst)
if not lst: return ""
if type(lst[0])==list:
# rule
s=""
for part in lst:
if s: s+=" | "
s+=make_reasoner_atom(part)
return s+"."
else:
# fact
return make_reasoner_atom(lst)+"."
def make_reasoner_atom(lst):
if not lst: return ""
s=""
for el in lst[1:]:
if s: s+=","
s+=el
s=lst[0]+"("+s+")"
return s
def makeconst(s):
return "c"+s
main(intext)