Xref: utzoo news.software.b:6632 alt.sources:3016 Path: utzoo!utgpu!cs.utexas.edu!uunet!mcsun!ukc!tcdcs!dce.ie!em From: em@dce.ie (Eamonn McManus) Newsgroups: news.software.b,alt.sources Subject: Re: unbatcher out of sync? Message-ID: Date: 15 Jan 91 23:34:28 GMT References: <1991Jan11.001342.6553@micor.OCUnix.On.Ca> <1991Jan11.225132.17555@zoo.toronto.edu> Followup-To: news.software.b Organization: Datacode Communications Ltd, Dublin, Ireland Lines: 188 henry@zoo.toronto.edu (Henry Spencer) writes: >It means "something's wrong with your batch": relaynews did not find a >"#! rnews nnnnn" line where one should have been. Typically this means >garbling during preparation or transmission. One notorious trouble spot >is that the batch format cannot tolerate transformations of newlines to >CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on. We had `unbatcher out of sync' problems at a site I was involved in, which was fed its news by mail from a VMS site (ugh). The VMS mailer (PMDF) got confused when lines exceeded 256 characters, as References lines often do, and would make a total hash of the header when this happened. As a result, the "#! rnews" count would always be off by a small amount for the affected article. C News resyncs at the next "#! rnews" line, but if the count is too long for the actual article contents it will have missed the start of the article following the garbled one. To kludge around this problem I wrote a program `patchbatch' which zips through a news batch looking for "#! rnews" lines with incorrect counts. If it finds one, it hunts back and forth a small amount for the next "#! rnews" line and adjusts the incorrect one to point to it. This was surprisingly effective: while it was running I believe it never failed to correct a munged batch. I'm including the source of patchbatch in case it is of use to the original poster, or anyone else. , Eamonn /* patchbatch.c - patch a news batch. */ /* By Eamonn McManus , February 1990. * This program is not copyrighted. * * Blast through a news batch checking the offsets after `#! rnews'. * If we find that the offset does not lead to another `#! rnews' line * or EOF, we search around for the line somewhere in the vicinity. If * it is found, we go back and patch the original offset to point to the * correct place. This is useful for example on systems where long lines * get truncated or split in transmission, since in this case the stated * offset will be wrong. * * This is the hackiest program I have written in a long time. */ #include #include #include /* For O_RDWR. */ #include #include extern long strtol(); char verbose; extern int optind; main(argc, argv) char **argv; { int i, status; while ((i = getopt(argc, argv, "v")) != -1) switch (i) { case 'v': verbose = 1; break; default: goto usage; } if (optind == argc) { usage: fprintf(stderr, "Usage: patchbatch file [...]\n"); exit(2); } status = 0; for (i = optind; i < argc; i++) if (patchbatch(argv[i]) < 0) status = 1; exit(status); } static char lead[] = "#! rnews "; #define LEADLEN (sizeof lead - 1) #define FUDGE (2 * sizeof lead) int patchbatch(name) char *name; { int fd, i; long here, offset; char buf[64]; struct stat st; if ((fd = open(name, O_RDWR)) < 0) { perror(name); return -1; } if (fstat(fd, &st) < 0) { perror(name); return -1; } if ((i = read(fd, buf, sizeof buf - 1)) != sizeof buf - 1) { if (i < 0) perror(name); else fprintf(stderr, "%s: too short for a news batch\n"); close(fd); return -1; } buf[sizeof buf - 1] = '\0'; if (strncmp(buf, lead, LEADLEN) != 0) { fprintf(stderr, "%s: not a news batch (should start with %s)\n", name, lead); close(fd); return -1; } here = 0; i = 0; while (1) { char *p; int numsize; long artstart, newpos; offset = strtol(buf + LEADLEN, &p, 10); if (offset == 0) { fprintf(stderr, "%s: bad value after %s, file offset %ld\n", name, lead, here); close(fd); return -1; } numsize = p - (buf + LEADLEN); artstart = here + LEADLEN + numsize + 1/*\n*/; newpos = artstart + offset; if (newpos == st.st_size) return 0; else if (newpos > st.st_size) { char offstr[16]; lastart: offset = st.st_size - artstart; changeoffset: sprintf(offstr + 1, "%ld", offset); switch (strlen(offstr + 1) - numsize) { case 0: /* Same size, just overwrite. */ p = offstr + 1; break; case -1: /* Shorter, use leading 0. */ p = offstr; *p = '0'; break; case 1: /* Longer, oops. */ fprintf(stderr, "%s: no room to change article \ length to %ld, file offset %ld\n", name, offset, here); goto setnewpos; } lseek(fd, here + LEADLEN, 0); if (write(fd, p, numsize) < 0) { perror(name); return -1; } if (verbose) fprintf(stderr, "%s: changed article length to \ %ld, file offset %ld\n", name, offset, here); setnewpos: newpos = artstart + offset; if (newpos >= st.st_size) return 0; } else { /* newpos < st.st_size */ lseek(fd, newpos - FUDGE, 0); if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) goto lastart; if (strncmp(buf + FUDGE, lead, LEADLEN) == 0) { strcpy(buf, buf + FUDGE); /* Hmmm... */ here = newpos; continue; } for (p = buf; (p = strchr(p, lead[0])) != NULL; p++) if (strncmp(p, lead, LEADLEN) == 0) break; if (p == NULL) { fprintf(stderr, "%s: can't find next article \ with offset %ld from file pos %ld\n", name, offset, here); close(fd); return -1; } offset = (newpos - FUDGE) + (p - buf) - artstart; goto changeoffset; } lseek(fd, newpos, 0); if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) { fprintf(stderr, "%s: last article too short\n", name); close(fd); return -1; } here = newpos; } }