/* GB.PL */ %%%%%%%%% % "Gibberish" -- A GB'ish parser! % % A first attempt at a Government-Binding (GB) type % parser. This parser is intended as an introductory % 'toy' parser for NLU courses, so complexity will be % kept to a minimum while still being general enough % to be easily extended. All constraints have been % been implemented as explicit prolog goals for % perspicuity. Many optimizations are possible! The % gapping mechanism actually performs a transformation % of the sentence into a 'normal' form and so has been % made general enough to move arbitrary structures % through the parse tree. % % Author: Cameron Shelley % Address: cpshelley@violet.waterloo.edu % University of Waterloo % /* GB.PL */ % Comments are welcome! % % This software is released into the public domain on the % condition that the author is cited as such, and all % modifications remain in the public domain; and this % condition is imposed on all subsequent users. % % Modification History: % --------------------- % Jan 17/91 - creation % Feb 4/91 - fixed modal(nil) matching bug in "modal-2". % Feb 4/91 - added general conjunction rule "conj". % (idea from Steve Green -- thanks Steve!) % %%%%%%%%% parse :- read_sentence(Sentence), sentence(Struc,Sentence,[]), print_struct(Struc). %%%%%%%%% % 'sentence' will parse the basic np,vp structure % at the top level. Different sentence types will % be added (ie. questions). % % sentence( % Struc : return structure from sentence call % ) % %%%%%%%%% % % normal sentence % sentence(Struc) --> noun_phrase(Np,Pers,Nnum,nogap,nogap), {Nnum = Vnum}, verb_phrase(Vp,Pers,Vnum,nogap), {Struc = s(Np,Vp)}. % % question with modal transformed to initial position % sentence(Struc) --> modal(M,_,_,_,_,nogap,_), noun_phrase(Np,Pers,Nnum,nogap,nogap), {Nnum = Vnum}, verb_phrase(Vp,Pers,Vnum,M), {Struc = q(Np,Vp)}. %%%%%%%%% % 'noun_phrase' will parse the various types of np's and % should subcategorize between np's and sbars at some point. % Also, proper nouns and pronouns can be treated as special % np's in this system. % % noun_phrase( % Struc : return structure, % Pers : np 'person' = first | second | third, % Nnum : np 'number' = sing | plur, % Gap : transformed np (if any), % Gapout: output gap if Gap not resolved % ) % %%%%%%%%% noun_phrase(Struc,Pers,Nnum,Gap,Gapout) --> [], {Gap =.. [np|_]}, {Gapout = nogap}, {Struc = Gap}. noun_phrase(Struc,Pers,Num,Gap,Gapout) --> determiner(Det,Dnum), {Nnum = Dnum}, noun_bar(Nbar,Nnum), {Pers = third}, {Gapout = Gap}, {InStruc = np(Det,Nbar)}, conj(Struc,InStruc,Nnum,Num,np). % % determiner is the noun phrase specifier % % determiner( % Struc : return structure, % Dnum : det 'number' = sing | plur % ) % % No determiner is considered to pluralize the np, ie: % "cats go" but not *"cat goes". The default could be % changed to "all" or "some" if desired. % determiner(Struc,Dnum) --> [Word], {lexdet(Word,Dnum)}, {Struc = det(Word)}. determiner(Struc,Dnum) --> [], {Dnum = plur}, {Struc = det(nil)}. lexdet(the,_). lexdet(a,sing). lexdet(an,sing). % % noun_bar is here just a noun with arguments. A treatment % of adjectives should be added. % % noun_bar( % Struc : return structure, % Nnum : noun 'number' parsed = sing | plur % ) % noun_bar(Struc,Nnum) --> noun(N,Nnum), noun_args(Nmod), {Struc = nbar(N,Nmod)}. % % mass nouns should be considered as noun_bars in this system! % noun(Struc,Nnum) --> [Word], {Nnum = sing}, {lexnoun(Word,_)}, {Struc = noun(Word)}. noun(Struc,Nnum) --> [Word], {Nnum = plur}, {lexnoun(_,Word)}, {Struc = noun(Word)}. lexnoun(cat,cats). lexnoun(dog,dogs). lexnoun(stamp,stamps). lexnoun(office,offices). lexnoun(car,cars). lexnoun(man,men). lexnoun(chance,chances). lexnoun(house,houses). lexnoun(bar,bars). % % noun_args here allows only pp's or nil's. Handling of % embedded sentences can be added as suggested. % % noun_args( % Struc : return structure % ) % noun_args(Struc) --> prep_phrase(Pp), {Struc = n_args(Pp)}. %noun_args(Struc) --> % sentence_bar(Sb), % {Struc = n_args(Sb)}. noun_args(Struc) --> [], {Struc = n_args(nil)}. %%%%%%%%% % 'verb_phrase' will parse off the predicate of a sentence. % Auxiliaries could be added as suggested. Sensitivity to % tense would also be handy. % % verb_phrase( % Struc : return structure, % Mpers : 'person' of subject input to modal, % Mnum : 'number' of subject input to modal, % Mgap : gap (if any) input to modal % ) % % 'Xpers' and 'Xnum' represent constraints passed to the % verb phrase which may be altered by the components and % passed to the next component as 'Ypers' or 'Ynum', ie. % Mpers ==> Vpers. % %%%%%%%%% verb_phrase(Struc,Mpers,Mnum,Mgap) --> modal(M,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap), verb_bar(Vb,Vpers,Vbnum,Vbgap), {InStruc = vp(M,Vb)}, conj(Struc,InStruc,Mpers,Mnum,vp). % % 'verb_bar' parses a verb followed by arguments, if any. % Auxiliaries can be handled as specifiers before the actual % verb is read. Subcategorization (Scat) could also be made % more detailed. % % verb_bar( % Struc : return structure, % Pers : 'person' of subject (check for agreement), % Vnum : 'number' of subject (check for agreement again), % Pgap : transformed np from predicate (if any) % ) % % {Gapout = nogap} ensures that the parse doesn't end with % an unresolved structure being gapped. % verb_bar(Struc,Pers,Vnum,Pgap) --> verb(V,Pers,Vnum,Scat), predicate(P,Pgap,Gapout,Scat), {Gapout = nogap}, {Struc = vbar(V,P)}. % % 'modal' accepts the specifier of a vp. It should be % expanded to help compute the mood and tense of the % sentence. % % modal( % Struc : return structure, % Mpers : 'person' of subject np, % Vpers : 'person' resulting from 'modal' ("nil" if found), % Mnum : 'number' of subject np, % Vbnum : 'number' resulting from 'modal' ("inf" if found), % Mgap : transformed modal (if any), % Vbgap : gap resulting from 'modal' (unchanged if found) % ) % modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [Word], {lexmodal(Word)}, {Vbgap = Mgap}, {Vbnum = inf}, {Vpers = nil}, {Struc = modal(Word)}. modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [], {Mgap =.. [modal|[X]]}, {X \== nil}, {Vbgap = nogap}, {Vbnum = inf}, {Vpers = nil}, {Struc = Mgap}. modal(Struc,Mpers,Vpers,Mnum,Vbnum,Mgap,Vbgap) --> [], {Mgap = nogap}, {Vbgap = nogap}, {Vbnum = Mnum}, {Vpers = Mpers}, {Struc = modal(nil)}. lexmodal(can). lexmodal(could). lexmodal(will). lexmodal(would). lexmodal(shall). lexmodal(should). % % 'verb' parses the verb from the input if it is found in % the lexicon. "lexverb" could contain more info on the % verb. % % verb( % Struc : return structure, % Pers : 'person' of the subject (for agreement check), % Vnum : 'number' of the subject (for agreement check again!), % Scat : SubCATegory of the verb = % dt (ditransitive : two objects) | % tv (transitive : one object) | % iv (intransitive : no objects) % ) % verb(Struc,Pers,Vnum,Scat) --> [Word], {Pers \== third; Vnum = plur}, {lexverb(Scat,Word,_,_)}, {Struc = verb(Word)}. verb(Struc,Pers,Vnum,Scat) --> [Word], {Pers = third}, {Vnum = sing}, {lexverb(Scat,_,Word,_)}, {Struc = verb(Word)}. verb(Struc,Pers,Vnum,Scat) --> [Word], {Vnum \== inf}, {lexverb(Scat,_,_,Word)}, {Struc = verb(Word)}. lexverb(dt,give,gives,gave). lexverb(tv,have,has,had). lexverb(tv,see,sees,saw). lexverb(iv,go,goes,went). lexverb(tv,want,wants,wanted). lexverb(tv,drive,drives,drove). % % 'predicate' parses the subcategorized dt, tv, or iv arguments % of the verb. % % predicate( % Struc : return structure, % Pgap : transformed np gap (if any), % Gapout: output any unresolved gap, % Scat : SubCATegory to be returned % ) % predicate(Struc,Pgap,Gapout,Scat) --> {Scat = dt}, noun_phrase(Np1,_,_,nogap,_), noun_phrase(Np2,_,_,Pgap,Gapout), {Struc = pred(Np1,Np2)}. predicate(Struc,Pgap,Gapout,Scat) --> {Scat = tv}, noun_phrase(Np,_,_,Pgap,Gapout), {Struc = pred(Np)}. predicate(Struc,Pgap,Gapout,Scat) --> {Scat = iv}, [], {Gapout = Pgap}, {Struc = pred(nil)}. %%%%%%%%% % 'prep_phrase' does the obvious. Gapping could be introduced % to handle transformed pp's (but I doubt it :). % % prep_phrase( % Struc : return structure % ) % %%%%%%%%% prep_phrase(Struc) --> preposition(P), noun_phrase(Np,_,_,nogap,_), {InStruc = pp(P,Np)}, conj(Struc,InStruc,_,_,pp). preposition(Struc) --> [Word], {lexprep(Word)}, {Struc = prep(Word)}. lexprep(to). lexprep(from). lexprep(by). lexprep(of). lexprep(for). lexprep(with). %%%%%%%%% % 'conj' will parse off a conjuction followed by a constituent % of category 'Cat'. The result will be the right sister of % the previously parsed structure passed in. % % conj( % OutStruc : result structure from conj, % InStruc : previous structure parsed, % Arg1 : first constraint on constituent, % Arg2 : second constraint on constituent, % Cat : category of new structure to be parsed % ) % % By McCawley's usage (McCawley 1988, Vol 1 & 2), constituents % should only be conjoined to others of the same category; ie. % np "and" np, vp "or" vp, etc. If no conjunction is found % (conj-2,3), then the result structure is unchanged. % %%%%%%%%% conj(OutStruc,InStruc,Arg1,Arg2,Cat) --> conjunction(C,Num), construct(Constr,Cat,Arg1,Arg2,Num), {OutStruc =.. [Cat,InStruc,C,Constr]}. conj(Struc,Struc,_,_,vp) --> []. conj(Struc,Struc,Arg,Arg,_) --> []. conjunction(conj(Word),Num) --> [Word], {lexconj(Word,Num)}. lexconj(and,plur). lexconj(or,sing). % % the meaning of the last three args for 'construct' depend % on which constituent is being parsed. For np, the number % of the conjoined np is the 'number' of the first conjunction. % This is just a convenient heuristic. For vp, the person % and number must still agree across conjunction. For pp, % no such constraints are necessary. % construct(Struct,np,_,Num,Num) --> noun_phrase(Struct,_,_,nogap,_). construct(Struct,vp,Pers,Vnum,_) --> verb_phrase(Struct,Pers,Vnum,nogap). construct(Struct,pp,_,_,_) --> prep_phrase(Struct). %%%%%%%%% % 'read_sentence' provides the ability to get input % in a natural fashion by typing in words separated % by spaces and terminated with a period. Adapted % from _Prolog and Natural Language Analysis_ by % Pereira and Schieber. % %%%%%%%%% read_sentence(Input) :- get0(Char), read_sentence(Char,Input). read_sentence(Char,[]) :- period(Char),!. read_sentence(Char,Input) :- space(Char),!,get0(Char1), read_sentence(Char1,Input). read_sentence(Char,[Word|Words]) :- read_word(Char,Chars,Next), name(Word,Chars), read_sentence(Next,Words). read_word(C,[],C) :- space(C),!. read_word(C,[],C) :- period(C),!. read_word(Char,[Char|Chars],Last) :- get0(Next), read_word(Next,Chars,Last). space(32). period(46).