Path: utzoo!utgpu!jarvis.csri.toronto.edu!rutgers!cs.utexas.edu!uunet!mcvax!ukc!etive!aiai!ken From: ken@aiai.ed.ac.uk (Ken Johnson) Newsgroups: comp.lang.prolog Subject: Definite clause grammar Message-ID: <518@skye.ed.ac.uk> Date: 2 Jun 89 16:49:17 GMT Reply-To: ken@aiai.UUCP (Ken Johnson) Organization: AIAI, University of Edinburgh, Scotland Lines: 386 This may be of interest to readers of this group. It is a version of the definite clause grammar mechanism. The dialect of Prolog I have used is Edinburgh Prolog, but there is nothing in here so wierd it won't work on most dialects. I wrote this for a short course in Prolog to illustrate how the mechanism works: I hope you may find it useful. I have tested this code with the example grammars given, but I do not guarantee it. There are four files here dcg.pl Definite clause grammar. Instructions for use are in this file. roman.dcg Definite clause grammar for roman numerals numbers.dcg Definite clause grammar for numbers time.dcg Definite clause grammar for times of day Be warned that the time and numbers dcg's use British English, i.e. `a quarter past two' (not `after'); `three hundred and one' (not `three hundred one'). Enjoy it! (But please do not sell copies at a profit unless I get a share.) # This is a shell archive. # Remove everything above and including the cut line. # Then run the rest of the file through sh. -----cut here-----cut here-----cut here-----cut here----- #!/bin/sh # shar: Shell Archiver # Run the following text with /bin/sh to create: # dcg.pl # numbers.dcg # time.dcg # roman.dcg # This archive created: Fri Jun 2 17:43:11 1989 cat << \SHAR_EOF > dcg.pl % This implements a DCG in a Prolog that doesn't have one. % It is a fairly good example of how to manipulate terms using % functor/3 and arg/3. % % To load a grammar, `load_grammar(File).' % To use it, `my_phrase(Struct_name,Text)' % Terminals appear as thing --> [Terminal,...] % Nonterminals and bits of Prolog are standard. % % To rewrite a single term call rewrite/2, e.g. % rewrite((det-->[the]),Text). % % Ken Johnson, 2 June 1989 % In a system that did not have dcgs you would probably also have to % define % :- op(1200,xfx,'-->'). % :- op(901,fx,'{'). % :- op(900,xf,'}'). load_grammar(File) :- % Load the file by seeing(S), % repeatedly loading terms see(File), % and transforming them repeat, read(Term), ( Term == end_of_file ; process_grammar_term(Term), fail ), !, seen, see(S). process_grammar_term(Term) :- % Attempt to rewrite; complain if rewrite(Term,Rewritten), % can't rewrite a term whose principal !, % functor is --> assert(Rewritten). process_grammar_term(Term) :- write('Cannot rewrite: '), write(Term), nl. % In a system the does not have `phrase' you could rename this % routine to `phrase'. It is identical to `phrase'; the call sequence % is e.g. my_phrase(non_terminal(T),[text...]) my_phrase(Term,Text) :- add_two_args(Term,New_term,Text,[]), New_term. rewrite((Term-->Sequence),Out) :- add_two_args(Term,New_term,A,B), transform(Sequence,A,B,Transformed), trim_null_output(Transformed,New_term,(New_term :- Transformed),Out), !. transform((P,Q),A,B,U) :- transform_2(P,A,C), !, transform(Q,C,B,U). transform((P,Q),A,B,Out) :- transform_1(P,A,C,T), !, transform(Q,C,B,U), trim_null_output(U,T,(T,U),Out). transform(P,A,B,true) :- transform_2(P,A,B), !. transform(P,A,B,U) :- transform_1(P,A,B,U). transform_1({Prolog},A,A,Prolog) :- !. transform_1(T,A,B,New_term) :- add_two_args(T,New_term,A,B). transform_2([H|T],A,B) :- split([H|T],A,B), !. transform_2([],A,A) :- !. add_two_args(T,New_term,A,B) :- functor(T,F,N), N1 is N + 1, N2 is N + 2, functor(New_term,F,N2), copy_args(N,T,New_term), arg(N1,New_term,A), arg(N2,New_term,B). copy_args(0,_,_). copy_args(N,T,U) :- arg(N,T,A), arg(N,U,A), M is N - 1, copy_args(M,T,U). split([H|T],[H|U],V) :- split(T,U,V). split([],V,V). % % This is a device to allow me to replace % x,y,true. by x,y. % and p :- true. by p. % trim_null_output(true,Out,_,Out) :- !. trim_null_output(_,_,Out,Out). SHAR_EOF cat << \SHAR_EOF > numbers.dcg % Grammar for numbers, e.g. % phrase(number(I),[two,hundred,and,fifty,six]). % An astonishing characteristic of this code is that it's % fully bidirectional. The expression % phrase(number(256),Words) % will instantiate Words = [two,hundred,and,fifty,six]. % What's more, % phrase(number(I),Words) % will eventually instantiate I and Words to all the numbers it knows. % % Ken Johnson 17-9-87 number(I) --> '1_to_999'(I). number(I) --> '1000_to_999999'(I). '1_to_999'(I) --> '1_to_99'(I). '1_to_999'(I) --> '100_to_999'(I). % Compound number with thousands '1000_to_999999'(I) --> thousands(I). '1000_to_999999'(I) --> thousands(K),'100_to_999'(Htu), { I is K + Htu }. '1000_to_999999'(I) --> thousands(K),[and],'1_to_99'(Tu), { I is K + Tu }. % Thousands thousands(I) --> '1_to_999'(K), [thousand], { I is K * 1000 }. % Compound number with hundreds, tens and units '100_to_999'(C) --> one_to_nine(H),[hundred], { C is 100 * H }. '100_to_999'(C) --> one_to_nine(H),[hundred],[and],'1_to_99'(Tu), { C is (100 * H) + Tu }. % Complete number: a single word 1-9 or 10-19 '1_to_99'(I) --> one_to_nine(I). '1_to_99'(I) --> ten_to_nineteen(I). '1_to_99'(I) --> multiple_of_ten(I). '1_to_99'(I) --> '20_to_99'(I). % Compound number with tens and units '20_to_99'(I) --> multiple_of_ten(T), one_to_nine(U), { I is T + U }. % Single words (terminal nodes) one_to_nine(1) --> [one]. one_to_nine(2) --> [two]. one_to_nine(3) --> [three]. one_to_nine(4) --> [four]. one_to_nine(5) --> [five]. one_to_nine(6) --> [six]. one_to_nine(7) --> [seven]. one_to_nine(8) --> [eight]. one_to_nine(9) --> [nine]. ten_to_nineteen(10) --> [ten]. ten_to_nineteen(11) --> [eleven]. ten_to_nineteen(12) --> [twelve]. ten_to_nineteen(13) --> [thirteen]. ten_to_nineteen(14) --> [fourteen]. ten_to_nineteen(15) --> [fifteen]. ten_to_nineteen(16) --> [sixteen]. ten_to_nineteen(17) --> [seventeen]. ten_to_nineteen(18) --> [eighteen]. ten_to_nineteen(19) --> [nineteen]. multiple_of_ten(20) --> [twenty]. multiple_of_ten(30) --> [thirty]. multiple_of_ten(40) --> [forty]. multiple_of_ten(50) --> [fifty]. multiple_of_ten(60) --> [sixty]. multiple_of_ten(70) --> [seventy]. multiple_of_ten(80) --> [eighty]. multiple_of_ten(90) --> [ninety]. SHAR_EOF cat << \SHAR_EOF > time.dcg % Telling the time DCG. Needs "numbers.dcg" % Ken Johnson 18-9-87 % Note that : is declared an operator (prec 600, xfx) and it can therefore % be used without a declaration. time(H:0) --> hour(H), [o_clock]. time(H:0) --> named_time(H). time(H:15) --> [a,quarter,past],hour(H). time(H:30) --> [half,past],hour(H). time(H:45) --> [a,quarter,to],hour(Hprev), { ( H = 12 -> Hprev = 1 ; H is Hprev - 1 ) }. time(H:M) --> number(M),minutes,[past],hour(H), { M >= 1, M =< 29 }. time(H:M) --> number(Mp),minutes,[to],hour(Hp), { Mp >= 1, Mp =< 29, M is 60 - Mp, ( H = 12 -> Hp = 1 ; H is Hp - 1 ) }. hour(I) --> number(I), { I >= 1, I =< 12 }. hour(I) --> named_time(I). named_time(12) --> [midnight]. named_time(12) --> [noon]. named_time(12) --> [midday]. % Minutes -- a word that may be present and may not minutes --> [minutes]. minutes --> []. SHAR_EOF cat << \SHAR_EOF > roman.dcg % Roman numerals up to 1999. (There is no special notation for 5000, hence % 4000 is mmmm, 5000 is mmmmm and that is not interesting. So I have % stopped at allowing at most one 'm'). % Bidirectional. Call with either % ?- phrase(roman(1989),List) % or % ?- phrase(roman(N),[c,d,i,i]). roman(N) --> thousands(K), hundreds(H), tens(T), units(U), { N is K * 1000 + H * 100 + T * 10 + U }. thousands(0) --> []. thousands(1) --> [m]. hundreds(N) --> legal_sequence(c,d,m,N). tens(N) --> legal_sequence(x,l,c,N). units(N) --> legal_sequence(i,v,x,N). % The predicate `legal_sequence' exploits the fact that each group % of Roman symbols is used in the same combinations, e.g. % i, ii, iii, iv, v, vi, vii, viii, ix is paralleled by % x, xx, xxx, xl, l, lx, lxx, lxxx, xc and by % c, cc, ccc, cd, d, dc, dcc, dccc, cm legal_sequence(_,_,_,0) --> []. legal_sequence(Single,Five,Ten,N) --> [Single], follows_single(Single,Five,Ten,N). legal_sequence(Single,Five,Ten,N) --> [Five], follows_five(Single,Five,Ten,N). follows_single(_,_,_,1) --> []. follows_single(Single,_,_,N) --> one_or_two(Single,V), { N is V + 1 }. follows_single(_,Five,_,4) --> [Five]. follows_single(_,_,Ten,9) --> [Ten]. follows_five(_,_,_,5) --> []. follows_five(Single,_,_,N) --> one_two_or_three(Single,V), {N is V + 5}. one_two_or_three(Single,N) --> one_or_two(Single,N). one_two_or_three(Single,3) --> [Single,Single,Single]. one_or_two(Single,1) --> [Single]. one_or_two(Single,2) --> [Single,Single]. SHAR_EOF # End of shell archive exit 0 -- Ken Johnson, AI Applications Institute, 80 South Bridge, Edinburgh EH1 1HN E-mail ken@aiai.ed.ac.uk, phone 031-225 4464 extension 212 `I have read your article, Mr. Johnson, and I am no wiser than when I started.' -- `Possibly not, sir, but far better informed.'