Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!uwm.edu!gem.mps.ohio-state.edu!tut.cis.ohio-state.edu!purdue!bu-cs!bloom-beacon!eru!luth!sunic!tut!tukki!tarvaine From: tarvaine@tukki.jyu.fi (Tapani Tarvainen) Newsgroups: gnu.utils.bug Subject: more sed fixes Summary: strange regexps, 'a' and 'r' improved Message-ID: <1520@tukki.jyu.fi> Date: 16 Oct 89 19:30:36 GMT Reply-To: tarvaine@tukki.jyu.fi (Tapani Tarvainen) Organization: University of Jyvaskyla, Finland Lines: 300 (1) '\n' didn't work in context addresses (yes, I have a real script that needs it). This was easy to fix (effectively copied code from 's'). (2) Delimiters that normally mustn't be preceded by \ didn't work properly, e.g., 'sw\wordwouchw' would become 's/\word/ouch/' instead of 's/word/ouch/' as it should. (This is rather unlikely to be of any consequence, but I fixed it anyway - it wasn't hard.) (3) 'a' and 'r' command worked by copying the text to be appended to a buffer in memory - one runs out of memory easily that way, especially with 'r', and it wasted time, too. I rewrote their handling so that when they're encountered during execution a pointer is added to a list which is then processed at the end. Now you can read an arbitrarily big file with 'r' without difficulties. Below is context diff from my previous version. If you want diffs from 1.06 distribution or the entire sed.c as it now stands (not much longer than the diff by now) just send me mail. *** sed.old Mon Oct 16 20:56:24 1989 --- sed.c Mon Oct 16 21:27:45 1989 *************** *** 1,7 **** /* GNU SED, a batch stream editor. Copyright (C) 1989, Free Software Foundation, Inc. ! Last changed by Tapani Tarvainen 13 October 1989 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by --- 1,7 ---- /* GNU SED, a batch stream editor. Copyright (C) 1989, Free Software Foundation, Inc. ! Last changed by Tapani Tarvainen 16 October 1989 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by *************** *** 49,69 **** - fixed a bug in append_pattern_space (moved n==0 test to beginning of loop) - fixed 's/any//' so it won't do ck_malloc(0) ! - fixed 'r' reallocation of append buffer ! - changed 'a' reallocation: double append buffer when it overflows ! instead of adding just enough for the new line - fixed handling of comments at end of file - changed error and usage messages slightly (print usage if no arguments, remove path from name in messages) - moved some ANSI-routine replacements inside #ifndef __STDC__ (introduced ck_strdup) ! - added ANSI-style prorotypes for everything (so this compiles OK even when sizeof(int) != sizeof(pointer) ...) - added \x delimiter syntax for context addresses (e.g., '\-string-p' works like '/string/p') */ - #ifdef MSDOS /* MicroSoft C 5.1 defines this */ #define __MSDOS__ #endif --- 49,69 ---- - fixed a bug in append_pattern_space (moved n==0 test to beginning of loop) - fixed 's/any//' so it won't do ck_malloc(0) ! - rewrote 'a' and 'r' handling so that they no longer need ! to store an extra copy of the string/file to be appended ! - fixed \n behaviour in context addresses ! - fixed \delimiter behaviour with odd delimiters - fixed handling of comments at end of file - changed error and usage messages slightly (print usage if no arguments, remove path from name in messages) - moved some ANSI-routine replacements inside #ifndef __STDC__ (introduced ck_strdup) ! - added ANSI-style prototypes for everything (so this compiles OK even when sizeof(int) != sizeof(pointer) ...) - added \x delimiter syntax for context addresses (e.g., '\-string-p' works like '/string/p') */ #ifdef MSDOS /* MicroSoft C 5.1 defines this */ #define __MSDOS__ #endif *************** *** 312,319 **** /* An input line that's been stored by later use by the program */ struct line hold; ! /* A 'line' to append to the current line when it comes time to write it out */ ! struct line append; /* When we're reading a script command from a string, 'prog_start' and 'prog_end' point --- 312,320 ---- /* An input line that's been stored by later use by the program */ struct line hold; ! /* list of 'a' and 'r' commands to be executed at the end */ ! struct sed_cmd **append_list; ! unsigned append_alloc, append_used; /* When we're reading a script command from a string, 'prog_start' and 'prog_end' point *************** *** 424,432 **** line.alloc=50; line.text=ck_malloc(50); ! append.length=0; ! append.alloc=50; ! append.text=ck_malloc(50); hold.length=0; hold.alloc=50; --- 425,433 ---- line.alloc=50; line.text=ck_malloc(50); ! append_alloc = 10; ! append_list = ck_malloc(append_alloc * sizeof(struct sed_cmd *)); ! append_used = 0; hold.length=0; hold.alloc=50; *************** *** 681,687 **** case EOF: break; default: ! add1_buffer(b,'\\'); add1_buffer(b,ch); break; } --- 682,689 ---- case EOF: break; default: ! if (ch != slash) ! add1_buffer(b,'\\'); add1_buffer(b,ch); break; } *************** *** 868,876 **** { int ch; int num; char *b; VOID *init_buffer(); - char slash = '/'; ch=inchar(); --- 870,878 ---- { int ch; int num; + int slash; char *b; VOID *init_buffer(); ch=inchar(); *************** *** 884,898 **** addr->addr_type=ADDR_NUM; addr->addr_number = num; return 1; ! } else if(ch=='/' || ch=='\\' && (slash = inchar()) != EOF) { addr->addr_type=ADDR_REGEX; b=init_buffer(); while((ch=inchar())!=EOF && ch!=slash) { add1_buffer(b,ch); if(ch=='\\') { ! ch=inchar(); ! if(ch!=EOF) add1_buffer(b,ch); } } if(size_buffer(b)) { --- 886,909 ---- addr->addr_type=ADDR_NUM; addr->addr_number = num; return 1; ! } else if(ch=='/' || ch=='\\' && (ch = inchar()) != EOF) { ! slash = ch; addr->addr_type=ADDR_REGEX; b=init_buffer(); while((ch=inchar())!=EOF && ch!=slash) { add1_buffer(b,ch); if(ch=='\\') { ! switch(ch=inchar()) { ! case 'n': ! add1_buffer(b,'\n'); ! break; ! default: ! if (ch != slash) ! add1_buffer(b,'\\'); add1_buffer(b,ch); + case EOF: + break; + } } } if(size_buffer(b)) { *************** *** 1030,1038 **** execute_program(the_program); if(!no_default_output) ck_fwrite(line.text,1,line.length,stdout); ! if(append.length) { ! ck_fwrite(append.text,1,append.length,stdout); ! append.length=0; } if(quit_cmd) break; --- 1041,1063 ---- execute_program(the_program); if(!no_default_output) ck_fwrite(line.text,1,line.length,stdout); ! if (append_used) { ! struct sed_cmd *cur_cmd; ! int n, i; ! char tmp_buf[1024]; ! for (i = 0; i < append_used; i++) { ! cur_cmd = append_list[i]; ! if (cur_cmd->cmd == 'a') { ! ck_fwrite(cur_cmd->x.cmd_txt.text,1, ! cur_cmd->x.cmd_txt.text_len,stdout); ! } else { /* 'r' */ ! rewind(cur_cmd->x.io_file); ! while((n=fread(tmp_buf,sizeof(char),1024,cur_cmd->x.io_file))>0) ! ck_fwrite(tmp_buf,1,n,stdout); ! if(ferror(cur_cmd->x.io_file)) ! panic("Read error on input file to 'r' command\n"); ! } ! } } if(quit_cmd) break; *************** *** 1095,1106 **** break; case 'a': ! while(append.alloc-append.lengthx.cmd_txt.text_len) { ! append.alloc *= 2; ! append.text=ck_realloc(append.text,append.alloc); } ! bcopy(cur_cmd->x.cmd_txt.text,append.text+append.length,cur_cmd->x.cmd_txt.text_len); ! append.length+=cur_cmd->x.cmd_txt.text_len; break; case 'b': --- 1120,1132 ---- break; case 'a': ! case 'r': ! if (append_used == append_alloc) { ! append_alloc += 10; ! append_list = ck_realloc(append_list, ! append_alloc * sizeof(struct sed_cmd *)); } ! append_list[append_used++] = cur_cmd; break; case 'b': *************** *** 1249,1271 **** case 'q': quit: quit_cmd++; end_cycle++; - break; - - case 'r': - { - int n=0; - - rewind(cur_cmd->x.io_file); - do { - append.length += n; - if(append.length==append.alloc) { - append.alloc *= 2; - append.text = ck_realloc(append.text, append.alloc); - } - } while((n=fread(append.text+append.length,sizeof(char),append.alloc-append.length,cur_cmd->x.io_file))>0); - if(ferror(cur_cmd->x.io_file)) - panic("Read error on input file to 'r' command\n"); - } break; case 's': --- 1275,1280 ---- -- Tapani Tarvainen (tarvaine@tukki.jyu.fi, tarvainen@finjyu.bitnet)