Path: utzoo!utgpu!news-server.csri.toronto.edu!cs.utexas.edu!sun-barr!newstop!texsun!letni!mic!convex!convex.COM From: tchrist@convex.COM (Tom Christiansen) Newsgroups: comp.lang.perl Subject: Re: Matching parentheses Message-ID: <109563@convex.convex.com> Date: 30 Nov 90 07:15:55 GMT References: <1990Nov28.150131.28981@ugle.unit.no> <16600@csli.Stanford.EDU> <5209:Nov2909:58:4190@kramden.acf.nyu.edu> Sender: news@convex.com Reply-To: tchrist@convex.COM (Tom Christiansen) Organization: CONVEX Software Development, Richardson, TX Lines: 176 In article <5209:Nov2909:58:4190@kramden.acf.nyu.edu> brnstnd@kramden.acf.nyu.edu (Dan Bernstein) writes: > [ 7-line sed program deleted ] While it's always good to choose the right task, and we shouldn't forget our simpler tools, I don't think the original poster has yet had his question answered. What he wanted was not merely WHETHER an expression was balanced, but rather how to pull out a balanced expression from a variable in a Perl program for subsequent use in that program. Enclosed below you will find some code that seems to do this. There's a &pull_quotes() function that you want to feed two things: the string with quoted strings in it, and the two characters that you want to call quotes. My enclosed examples demo using ` and ', as well as ( and ). Multi-character quotes are left as an exercise for the reader. :-) The function returns a list of all the quoted strings, as you encounter them proceeding left to right. For example, &pull_quotes('value = (double)(U_L(value) & U_L(str_gnum(st[2])));', '()'); returns ( "double", "U_L(value) & U_L(str_gnum(st[2]))", "value", "str_gnum(st[2])", "st[2]" ); Several examples of varying complexity are included. I'll spare the net the 8k of expected output these produce. You should be able to massage this into whatever you want to do with these matched, quoted strings. I eagerly await Dan's translation of this into sed. :-) --tom #!/usr/local/bin/perl # # pull_quotes -- tom christiansen, tchrist@convex.com # ################################################################################ sub show_quotes { local($string, $qchars) = @_; local($i) = $[; local($_); local(@list) = &pull_quotes($string,$qchars); $qchars =~ s/(.)(.)/$1...$2/; print "Extracted ", 0+@list, " ", $qchars, " quote", (@list != 1)?'s':'', " from: <<", $_[0]. ">>\n"; for (@list) { print "Quote #$i is <<", $_, ">>\n"; $i++; } print "\n"; } ################################################################################ sub pull_quotes { # pull_quotes($string, $quotchars) => @quotestrings local($_, $qchars) = @_; local($qL, $qR); # left and right quote chars, like `' or () local($quote_level); # current quote level local($max_quote); # deepest we've gotten local($qstring); # tmp space for quote local(@quotes); # list of quotes to return local($d) = '\$'; # not sure why this must be here local($b) = '\\'; # nor this local(@done); # which quotes we've finished so far die "need two just quote chars" if length($qchars) != 2; $qL = substr($qchars, 0, 1); $qR = substr($qchars, 1, 1); s/\\(.)/"\201".ord($1)."\202"/eg; # protect from backslashes $max_quote = $quote_level = $[-1; while ( /[$qchars]/ ) { if ($& eq $qL) { do { ++$quote_level; } while $done[$quote_level]; s/$b$qL/\${QL${quote_level}}/; $max_quote = $quote_level if $max_quote < $quote_level; } elsif ($& eq $qR) { s/$b$qR/\${QR${quote_level}}/; $done[$quote_level]++; do { --$quote_level; } while $done[$quote_level]; } else { die "unexpected quot char: $&"; } } print "pre-re-interpolated string is $_\n" if $debug; for ($quote_level = $[; $quote_level <= $max_quote; $quote_level++) { ($qstring) = /${d}\{QL$quote_level\}([^\000]*)${d}\{QR$quote_level}/; $qstring =~ s/\${QL\d+\}/$qL/g; $qstring =~ s/\${QR\d+\}/$qR/g; $qstring =~ s/\201(\d+)\202/pack('C',$1)/eg; $quotes[$quote_level] = $qstring; } @quotes; } ################################################################################ ################################################################################ ################################################################################ ################################################################################ ################################################################################ # MAIN STARTS HERE &show_quotes("This has `one' quote", "`'"); &show_quotes(q(Like this: `Jim' and `Bill said, ``Boo'' to me' to the end.), "`'"); &show_quotes("Mom said, `Jim asked ``How come?'', so I told her ``because''.'", "`'"); &show_quotes(q(Mom said, `Don\\'t put Jimmy\\'s `widjet' down the drain!'), "`'"); &show_quotes('a = (b + c) / 2', '()'); &show_quotes('a = (b + (c*d)) / (7+(2/3))', '()'); &show_quotes('value = (double)(U_L(value) & U_L(str_gnum(st[2])));', '()'); &show_quotes(<<'EOQ', "()"); case 'I': str_cat(str,"-"); str_cat(str,s); str_cat(str," "); if (*++s) { (void)apush((((STBP*)(incstab->str_ptr))->stbp_array ? ((STBP*)(incstab->str_ptr))->stbp_array : ((STBP*)(aadd(incstab)->str_ptr))->stbp_array),str_make(s,0)); } else if (argv[1]) { (void)apush((((STBP*)(incstab->str_ptr))->stbp_array ? ((STBP*)(incstab->str_ptr))->stbp_array : ((STBP*)(aadd(incstab)->str_ptr))->stbp_array),str_make(argv[1],0)); str_cat(str,argv[1]); argc--,argv++; str_cat(str," "); } EOQ &show_quotes(<<'EOW', '()'); (defun define-abbrevs (&optional arg) "Define abbrevs according to current visible buffer contents. See documentation of edit-abbrevs for info on the format of the text you must have in the buffer. With argument, eliminate all abbrev definitions except the ones defined from the buffer now." (interactive "P") (if arg (kill-all-abbrevs)) (save-excursion (goto-char (point-min)) (while (and (not (eobp)) (re-search-forward "^\(" nil t)) (let* ((buf (current-buffer)) (table (read buf)) abbrevs) (forward-line 1) (while (progn (forward-line 1) (not (eolp))) (setq name (read buf) count (read buf) exp (read buf)) (skip-chars-backward " \t\n\f") (setq hook (if (not (eolp)) (read buf))) (skip-chars-backward " \t\n\f") (setq abbrevs (cons (list name exp hook count) abbrevs))) (define-abbrev-table table abbrevs))))) EOW exit 0;