Nlp.py

Allikas: Lambda
# nlp.py
#
# this is a trivial English-to-reasoner-input converter.
#
# Steps in parsing:
# split the text into sentences and then sentences to words (tokens)
# then parse the .-ending and ?-ending sentences separately.
# Sentences are assumed to be either with the fixed structure like
#
# John is a father of Andrew.
#
#   where we look for word followed by "of" as predicate
#   and assume the first and last words are arguments, hence
#   we build
#   ["cfather","cjohn","candrew"]
#   and we convert to reasoner line
#   cfather(cJohn,cAndrew).
#   where the "c" is for avoiding capitals to 
#   thought of as variables.
#
# or 
#
# Andrew is a man.
#   where we check that it contains no "of"
#   and assume the last word is the single-arg predicate
#   and the firs is its arg
#   we build
#   ["cman","candrew"]
#   and we convert to reasoner line
#   cman(cAndrew).
#
# or questions like
# 
# Who is the father of Andrew?
#
#   where we check if the first word is "who" or "what"
#   and assume the last word is the second argument
#   we build
#   [["-cfather","X1","candrew"],["ans","X1"]]
#   and we convert to the reasoner line
#   -cfather(X1,cAndrew) | ans(X1).


intext="John is a father of Andrew. Andrew is a man. Who is the father of Andrew?"

def main(txt):
  tmp=parse(txt)
  reasonertext=""
  for tokenlst in tmp:
    sentparsed=parse_sentence(tokenlst)
    #print("sentparsed in main",sentparsed)
    reasonerline=make_reasoner_line(sentparsed)
    reasonertext+=reasonerline+"\n"
  print(reasonertext)
    
    
def parse(txt):
  tmp=txt.replace("."," . ")
  tmp=tmp.replace("?"," ? ")
  tmp=tmp.replace("!"," ! ")
  tokens=tmp.split()
  sentences=[]
  sent=[]
  for token in tokens:    
    if token in [".","?","!"]:
      sent.append(token)
      sentences.append(sent)
      sent=[]
    else:
      sent.append(token)      
  return sentences

def parse_sentence(tokenlst):
  if tokenlst[-1]=="?":
    return parse_question_sentence(tokenlst[:-1])
  else:
    return parse_fact_sentence(tokenlst[:-1])

def parse_fact_sentence(tokenlst):
  #print("tokenlst for parse_fact_sentence",tokenlst)
  if not("of" in tokenlst):
    return parse_type_fact_sentence(tokenlst)
  of_loc=tokenlst.index("of")
  pred=makeconst(tokenlst[of_loc-1]) 
  arg1=makeconst(tokenlst[0])
  arglast=makeconst(tokenlst[-1])
  return [pred,arg1,arglast]
  
def parse_type_fact_sentence(tokenlst):
  #print("tokenlst for parse_type_fact_sentence",tokenlst)
  if "of" in tokenlst: return None  
  pred=makeconst(tokenlst[-1]) 
  arg1=makeconst(tokenlst[0])
  return [pred,arg1]  

def parse_question_sentence(tokenlst):
  #print("tokenlst for parse_question_sentence",tokenlst)
  of_loc=tokenlst.index("of")
  if not of_loc: return None
  pred=makeconst(tokenlst[int(of_loc)-1]) 
  arg1="X1" 
  arglast=makeconst(tokenlst[-1])
  return [["-"+pred,arg1,arglast],["ans","X1"]]
  
def make_reasoner_line(lst):  
  #print("make_reasoner_line input",lst)  
  if not lst: return ""
  if type(lst[0])==list:
    # rule
    s=""
    for part in lst:
      if s: s+=" | "
      s+=make_reasoner_atom(part)     
    return s+"."
  else:
    # fact
    return make_reasoner_atom(lst)+"."
  
def make_reasoner_atom(lst):
  if not lst: return ""
  s=""
  for el in lst[1:]:    
    if s: s+=","
    s+=el
  s=lst[0]+"("+s+")"  
  return s
  
def makeconst(s):
  return "c"+s

  
main(intext)