/*  GB.PL  */


%%%%%%%%%
%       "Gibberish" -- A GB'ish parser!
%
%   A first attempt at a Government-Binding (GB) type
%   parser.  This parser is intended as an introductory
%   'toy' parser for NLU courses, so complexity will be
%   kept to a minimum while still being general enough
%   to be easily extended.  All constraints have been
%   been implemented as explicit prolog goals for
%   perspicuity.  Many optimizations are possible!  The
%   gapping mechanism actually performs a transformation
%   of the sentence into a 'normal' form and so has been
%   made general enough to move arbitrary structures
%   through the parse tree.
%
%   Author: Cameron Shelley
%   Address: cpshelley@violet.waterloo.edu
%        University of Waterloo
%
/*  GB.PL  */


%   Comments are welcome!
%
%   This software is released into the public domain on the
%   condition that the author is cited as such, and all
%   modifications remain in the public domain; and this
%   condition is imposed on all subsequent users.
%
%   Modification History:
%   ---------------------
%   Jan 17/91 - creation
%   Feb 4/91  - fixed modal(nil) matching bug in "modal-2".
%   Feb 4/91  - added general conjunction rule "conj".
%           (idea from Steve Green -- thanks Steve!)
%
%%%%%%%%%

parse :- read_sentence(Sentence),
     sentence(Struc,Sentence,[]),
     print_struct(Struc).

%%%%%%%%%
%   'sentence' will parse the basic np,vp structure
%   at the top level.  Different sentence types will
%   be added (ie. questions).
%
%   sentence(
%       Struc  : return structure from sentence call
%       )
%
%%%%%%%%%

%
%   normal sentence
%

sentence(Struc) -->
    noun_phrase(Np,Pers,Nnum,nogap,nogap),
    {Nnum = Vnum},
    verb_phrase(Vp,Pers,Vnum,nogap),
    {Struc = s(Np,Vp)}.

%
%   question with modal transformed to initial position
%

sentence(Struc) -->
    modal(M,_,_,_,_,nogap,_),
    noun_phrase(Np,Pers,Nnum,nogap,nogap),
    {Nnum = Vnum},
    verb_phrase(Vp,Pers,Vnum,M),
    {Struc = q(Np,Vp)}.

%%%%%%%%%
%   'noun_phrase' will parse the various types of np's and
%   should subcategorize between np's and sbars at some point.
%   Also, proper nouns and pronouns can be treated as special
%   np's in this system.
%
%   noun_phrase(
%       Struc : return structure,
%       Pers  : np 'person' = first | second | third,
%       Nnum  : np 'number' = sing | plur,
%       Gap   : transformed np (if any),
%       Gapout: output gap if Gap not resolved
%   )
%
%%%%%%%%%

noun_phrase(Struc,Pers,Nnum,Gap,Gapout) -->
    [],
    {Gap =.. [np|_]},
    {Gapout = nogap},
    {Struc = Gap}.

noun_phrase(Struc,Pers,Num,Gap,Gapout) -->
    determiner(Det,Dnum),
    {Nnum = Dnum},
    noun_bar(Nbar,Nnum),
    {Pers = third},
    {Gapout = Gap},
    {InStruc = np(Det,Nbar)},
    conj(Struc,InStruc,Nnum,Num,np).

%
%   determiner is the noun phrase specifier
%
%   determiner(
%       Struc : return structure,
%       Dnum  : det 'number' = sing | plur
%   )
%
%   No determiner is considered to pluralize the np, ie:
%   "cats go" but not *"cat goes".  The default could be
%   changed to "all" or "some" if desired.
%

determiner(Struc,Dnum) --> [Word], {lexdet(Word,Dnum)}, {Struc = det(Word)}.
determiner(Struc,Dnum) --> [], {Dnum = plur}, {Struc = det(nil)}.

lexdet(the,_).
lexdet(a,sing).
lexdet(an,sing).

%
%   noun_bar is here just a noun with arguments.  A treatment
%   of adjectives should be added.
%
%   noun_bar(
%       Struc : return structure,
%       Nnum  : noun 'number' parsed = sing | plur
%       )
%

noun_bar(Struc,Nnum) -->
    noun(N,Nnum),
    noun_args(Nmod),
    {Struc = nbar(N,Nmod)}.

%
%   mass nouns should be considered as noun_bars in this system!
%

noun(Struc,Nnum) --> [Word], {Nnum = sing}, {lexnoun(Word,_)},
    {Struc = noun(Word)}.
noun(Struc,Nnum) --> [Word], {Nnum = plur}, {lexnoun(_,Word)},
    {Struc = noun(Word)}.

lexnoun(cat,cats).
lexnoun(dog,dogs).
lexnoun(stamp,stamps).
lexnoun(office,offices).
lexnoun(car,cars).
lexnoun(man,men).
lexnoun(chance,chances).
lexnoun(house,houses).
lexnoun(bar,bars).

%
%   noun_args here allows only pp's or nil's.  Handling of
%   embedded sentences can be added as suggested.
%
%   noun_args(
%       Struc : return structure
%       )
%

noun_args(Struc) -->
    prep_phrase(Pp),
    {Struc = n_args(Pp)}.

%noun_args(Struc) -->
%   sentence_bar(Sb),
%   {Struc = n_args(Sb)}.

noun_args(Struc) -->
    [],
    {Struc = n_args(nil)}.

%%%%%%%%%
%   'verb_phrase' will parse off the predicate of a sentence.
%   Auxiliaries could be added as suggested.  Sensitivity to
%   tense would also be handy.
%
%   verb_phrase(
%       Struc : return structure,
%       Mpers : 'person' of subject input to modal,
%       Mnum  : 'number' of subject input to modal,
%       Mgap  : gap (if any) input to modal
%   )
%
%   'Xpers' and 'Xnum' represent constraints passed to the
%   verb phrase which may be altered by the components and
%   passed to the next component as 'Ypers' or 'Ynum', ie.
%   Mpers ==> Vpers.
%
%%%%%%%%%

verb_phrase(Struc,Mpers,Mnum,Mgap) -->
    modal(M,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap),
    verb_bar(Vb,Vpers,Vbnum,Vbgap),
    {InStruc = vp(M,Vb)},
    conj(Struc,InStruc,Mpers,Mnum,vp).

%
%   'verb_bar' parses a verb followed by arguments, if any.
%   Auxiliaries can be handled as specifiers before the actual
%   verb is read.  Subcategorization (Scat) could also be made
%   more detailed.
%
%   verb_bar(
%       Struc : return structure,
%       Pers  : 'person' of subject (check for agreement),
%       Vnum  : 'number' of subject (check for agreement again),
%       Pgap  : transformed np from predicate (if any)
%   )
%
%   {Gapout = nogap} ensures that the parse doesn't end with
%   an unresolved structure being gapped.
%

verb_bar(Struc,Pers,Vnum,Pgap) -->
    verb(V,Pers,Vnum,Scat),
    predicate(P,Pgap,Gapout,Scat),
    {Gapout = nogap},
    {Struc = vbar(V,P)}.

%
%   'modal' accepts the specifier of a vp.  It should be
%   expanded to help compute the mood and tense of the
%   sentence.
%
%   modal(
%       Struc : return structure,
%       Mpers : 'person' of subject np,
%       Vpers : 'person' resulting from 'modal' ("nil" if found),
%       Mnum  : 'number' of subject np,
%       Vbnum : 'number' resulting from 'modal' ("inf" if found),
%       Mgap  : transformed modal (if any),
%       Vbgap : gap resulting from 'modal' (unchanged if found)
%   )
%

modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [Word],
    {lexmodal(Word)}, {Vbgap = Mgap}, {Vbnum = inf},
    {Vpers = nil}, {Struc = modal(Word)}.
modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [],
    {Mgap =.. [modal|[X]]}, {X \== nil}, {Vbgap = nogap}, {Vbnum = inf},
    {Vpers = nil}, {Struc = Mgap}.
modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [],
    {Mgap = nogap}, {Vbgap = nogap}, {Vbnum = Mnum},
    {Vpers = Mpers}, {Struc = modal(nil)}.

lexmodal(can).
lexmodal(could).
lexmodal(will).
lexmodal(would).
lexmodal(shall).
lexmodal(should).

%
%   'verb' parses the verb from the input if it is found in
%   the lexicon.  "lexverb" could contain more info on the
%   verb.
%
%   verb(
%       Struc : return structure,
%       Pers  : 'person' of the subject (for agreement check),
%       Vnum  : 'number' of the subject (for agreement check again!),
%       Scat  : SubCATegory of the verb =
%               dt (ditransitive : two objects) |
%               tv (transitive : one object) |
%               iv (intransitive : no objects)
%   )
%

verb(Struc,Pers,Vnum,Scat) --> [Word],
    {Pers \== third; Vnum = plur}, {lexverb(Scat,Word,_,_)},
    {Struc = verb(Word)}.
verb(Struc,Pers,Vnum,Scat) --> [Word],
    {Pers = third}, {Vnum = sing}, {lexverb(Scat,_,Word,_)},
    {Struc = verb(Word)}.
verb(Struc,Pers,Vnum,Scat) --> [Word],
    {Vnum \== inf}, {lexverb(Scat,_,_,Word)},
    {Struc = verb(Word)}.

lexverb(dt,give,gives,gave).
lexverb(tv,have,has,had).
lexverb(tv,see,sees,saw).
lexverb(iv,go,goes,went).
lexverb(tv,want,wants,wanted).
lexverb(tv,drive,drives,drove).

%
%   'predicate' parses the subcategorized dt, tv, or iv arguments
%   of the verb.
%
%   predicate(
%       Struc : return structure,
%       Pgap  : transformed np gap (if any),
%       Gapout: output any unresolved gap,
%       Scat  : SubCATegory to be returned
%   )
%

predicate(Struc,Pgap,Gapout,Scat) -->
    {Scat = dt},
    noun_phrase(Np1,_,_,nogap,_),
    noun_phrase(Np2,_,_,Pgap,Gapout),
    {Struc = pred(Np1,Np2)}.

predicate(Struc,Pgap,Gapout,Scat) -->
    {Scat = tv},
    noun_phrase(Np,_,_,Pgap,Gapout),
    {Struc = pred(Np)}.

predicate(Struc,Pgap,Gapout,Scat) -->
    {Scat = iv},
    [],
    {Gapout = Pgap},
    {Struc = pred(nil)}.

%%%%%%%%%
%   'prep_phrase' does the obvious.  Gapping could be introduced
%   to handle transformed pp's (but I doubt it :).
%
%   prep_phrase(
%       Struc : return structure
%   )
%
%%%%%%%%%

prep_phrase(Struc) -->
    preposition(P),
    noun_phrase(Np,_,_,nogap,_),
    {InStruc = pp(P,Np)},
    conj(Struc,InStruc,_,_,pp).

preposition(Struc) --> [Word], {lexprep(Word)}, {Struc = prep(Word)}.

lexprep(to).
lexprep(from).
lexprep(by).
lexprep(of).
lexprep(for).
lexprep(with).

%%%%%%%%%
%   'conj' will parse off a conjuction followed by a constituent
%   of category 'Cat'.  The result will be the right sister of
%   the previously parsed structure passed in.
%
%   conj(
%       OutStruc : result structure from conj,
%       InStruc  : previous structure parsed,
%       Arg1     : first constraint on constituent,
%       Arg2     : second constraint on constituent,
%       Cat      : category of new structure to be parsed
%   )
%
%   By McCawley's usage (McCawley 1988, Vol 1 & 2), constituents
%   should only be conjoined to others of the same category; ie.
%   np "and" np, vp "or" vp, etc.  If no conjunction is found
%   (conj-2,3), then the result structure is unchanged.
%
%%%%%%%%%

conj(OutStruc,InStruc,Arg1,Arg2,Cat) -->
    conjunction(C,Num),
    construct(Constr,Cat,Arg1,Arg2,Num),
    {OutStruc =.. [Cat,InStruc,C,Constr]}.

conj(Struc,Struc,_,_,vp) --> [].
conj(Struc,Struc,Arg,Arg,_) --> [].

conjunction(conj(Word),Num) --> [Word], {lexconj(Word,Num)}.

lexconj(and,plur).
lexconj(or,sing).

%
%   the meaning of the last three args for 'construct' depend
%   on which constituent is being parsed.  For np, the number
%   of the conjoined np is the 'number' of the first conjunction.
%   This is just a convenient heuristic.  For vp, the person 
%   and number must still agree across conjunction.  For pp,
%   no such constraints are necessary.
%

construct(Struct,np,_,Num,Num) -->
    noun_phrase(Struct,_,_,nogap,_).
construct(Struct,vp,Pers,Vnum,_) -->
    verb_phrase(Struct,Pers,Vnum,nogap).
construct(Struct,pp,_,_,_) -->
    prep_phrase(Struct).

%%%%%%%%%
%   'read_sentence' provides the ability to get input
%   in a natural fashion by typing in words separated
%   by spaces and terminated with a period.  Adapted
%   from _Prolog and Natural Language Analysis_ by
%   Pereira and Schieber.
%
%%%%%%%%%

read_sentence(Input) :- get0(Char), read_sentence(Char,Input).
read_sentence(Char,[]) :- period(Char),!.
read_sentence(Char,Input) :- space(Char),!,get0(Char1),
    read_sentence(Char1,Input).
read_sentence(Char,[Word|Words]) :- read_word(Char,Chars,Next),
    name(Word,Chars),
    read_sentence(Next,Words).

read_word(C,[],C) :- space(C),!.
read_word(C,[],C) :- period(C),!.
read_word(Char,[Char|Chars],Last) :- get0(Next), read_word(Next,Chars,Last).

space(32).
period(46).