Nlp.py
Allikas: Lambda
# nlp.py # # this is a trivial English-to-reasoner-input converter. # # Steps in parsing: # split the text into sentences and then sentences to words (tokens) # then parse the .-ending and ?-ending sentences separately. # Sentences are assumed to be either with the fixed structure like # # John is a father of Andrew. # # where we look for word followed by "of" as predicate # and assume the first and last words are arguments, hence # we build # ["cfather","cjohn","candrew"] # and we convert to reasoner line # cfather(cJohn,cAndrew). # where the "c" is for avoiding capitals to # thought of as variables. # # or # # Andrew is a man. # where we check that it contains no "of" # and assume the last word is the single-arg predicate # and the firs is its arg # we build # ["cman","candrew"] # and we convert to reasoner line # cman(cAndrew). # # or questions like # # Who is the father of Andrew? # # where we check if the first word is "who" or "what" # and assume the last word is the second argument # we build # [["-cfather","X1","candrew"],["ans","X1"]] # and we convert to the reasoner line # -cfather(X1,cAndrew) | ans(X1). intext="John is a father of Andrew. Andrew is a man. Who is the father of Andrew?" def main(txt): tmp=parse(txt) reasonertext="" for tokenlst in tmp: sentparsed=parse_sentence(tokenlst) #print("sentparsed in main",sentparsed) reasonerline=make_reasoner_line(sentparsed) reasonertext+=reasonerline+"\n" print(reasonertext) def parse(txt): tmp=txt.replace("."," . ") tmp=tmp.replace("?"," ? ") tmp=tmp.replace("!"," ! ") tokens=tmp.split() sentences=[] sent=[] for token in tokens: if token in [".","?","!"]: sent.append(token) sentences.append(sent) sent=[] else: sent.append(token) return sentences def parse_sentence(tokenlst): if tokenlst[-1]=="?": return parse_question_sentence(tokenlst[:-1]) else: return parse_fact_sentence(tokenlst[:-1]) def parse_fact_sentence(tokenlst): #print("tokenlst for parse_fact_sentence",tokenlst) if not("of" in tokenlst): return parse_type_fact_sentence(tokenlst) of_loc=tokenlst.index("of") pred=makeconst(tokenlst[of_loc-1]) arg1=makeconst(tokenlst[0]) arglast=makeconst(tokenlst[-1]) return [pred,arg1,arglast] def parse_type_fact_sentence(tokenlst): #print("tokenlst for parse_type_fact_sentence",tokenlst) if "of" in tokenlst: return None pred=makeconst(tokenlst[-1]) arg1=makeconst(tokenlst[0]) return [pred,arg1] def parse_question_sentence(tokenlst): #print("tokenlst for parse_question_sentence",tokenlst) of_loc=tokenlst.index("of") if not of_loc: return None pred=makeconst(tokenlst[int(of_loc)-1]) arg1="X1" arglast=makeconst(tokenlst[-1]) return [["-"+pred,arg1,arglast],["ans","X1"]] def make_reasoner_line(lst): #print("make_reasoner_line input",lst) if not lst: return "" if type(lst[0])==list: # rule s="" for part in lst: if s: s+=" | " s+=make_reasoner_atom(part) return s+"." else: # fact return make_reasoner_atom(lst)+"." def make_reasoner_atom(lst): if not lst: return "" s="" for el in lst[1:]: if s: s+="," s+=el s=lst[0]+"("+s+")" return s def makeconst(s): return "c"+s main(intext)