Path: utzoo!utgpu!news-server.csri.toronto.edu!rpi!usc!wuarchive!rex!uflorida!travis!brad From: brad@SSD.CSD.HARRIS.COM (Brad Appleton) Newsgroups: comp.unix.programmer Subject: Re: Extracting documentation from C code. Summary: Here is a ksh script to do it Message-ID: <3416@travis.csd.harris.com> Date: 19 May 91 17:02:48 GMT References: <9105180910.AA21944@ucbvax.Berkeley.EDU> Sender: news@travis.csd.harris.com Organization: Harris Computers Systems Division, Fort Lauderdale,FL Lines: 504 I originally had replied only to the original poster but there seems to be enough interest that I thought Id reply here. I have written a ksh script that uses nawk to implement just such a beast (Im working on a replacement written in perl). You can embed your documentation in ANY type of source file you wish (C, C++, Pascal, etc). Here is the script and a sample C source file for those of you that are interested. Let me know of any comments you might have. ______________________ "And miles to go before I sleep." ______________________ Brad Appleton brad@ssd.csd.harris.com Harris Computer Systems uunet!hcx1!brad Fort Lauderdale, FL USA ~~~~~~~~~~~~~~~~~~~~ Disclaimer: I said it, not my company! ~~~~~~~~~~~~~~~~~~~ #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'xdoc.ksh' <<'END_OF_FILE' X#!/bin/ksh X# X# xdoc - eXtract DOCumentation from structured C-Comments X# X# Created by Brad Appleton X XNAME="`basename $0`" X XSYNOPSIS="\ X$NAME [-n] [-f function] [-i string] [-p pattern] [-mname] [-s section] X [-t type=name] [-I subsection(s)] [-X subsection(s)] X [file ...]\ X" X XDESCRIPTION="\ X$NAME will extract the documentation associated with the named section from Xthe given files. If no section is given, then FILE is assumed.\ X" X XOPTIONS="\ X-n Dont print section title X-f function Extract text for the given function X-i string Indent text using the given string (default=3-spaces) X-p pattern Specify the pattern to trim of the beginning of each line X (default=\"[ \\t]\") X-m name Use the {t,n}roff -mname macros to format the ouput X-s section Extract text for the named section X-t type=name Extract text for the named type X-I subsections Specify which subsection(s) of the named section are to be X included in the output. If multiple subsections are desired X then the list must be placed in a single command-line argument. X By default, all subsections are printed. X-X subsections Specify which subsection(s) of the named section are to be X excluded from the output. If multiple subsections are desired X then the list must be placed in a single command-line argument. X By default, no subsections are excluded. X" X Xset +o nounset X Xfunction print_usage { X print -u2 "\nUsage: ${SYNOPSIS}" X if [ "$OPTIONS" ] ; then X print -u2 "\n${OPTIONS}" X fi X if [ "$DESCRIPTION" ] ; then X print -u2 "\n${DESCRIPTION}" X fi X print X exit ${1-:2} X} X Xalias warn_user="print -u2 '** '" X Xfunction error_msg { X print -u2 "${NAME}: $*" X} X Xfunction fatal_msg { X print -u2 "${NAME}: $*" X exit 2 X} X XSTARTPAT='^[ ]' XKWD='FILE'; isFUNC=0; isSECTION=0; isTYPE=0; NOHEADINGS=0; XINCLUDES='.*'; EXCLUDES=''; X X## trim all leading and trailing whitespace, and compress whitespace Xfunction tidylist { X print "$*" | sed -e 's/^[ ]//' -e 's/[ ]*$//' -e 's/[ ][ ]*/ /g' X} X X## parse options Xwhile getopts ':f:m:s:t:i:p:I:X:' OPT X do case "$OPT" in X n) NOTITLE='TRUE';; X f) KWD='FUNCTION'; IDENT="$OPTARG" ; isFUNC=1;; X s) KWD='SECTION'; IDENT="$OPTARG"; isSECTION=1 ;; X t) KWD="$(print ${OPTARG%=*} | tr '[a-z]' '[A-Z]')"; IDENT="${OPTARG#*=}"; X isTYPE=1 ;; X i) INDENT="${OPTARG}" ;; X p) STARTPAT="^${OPTARG#\^}" ;; X m) MACROS="-m${OPTARG}" ;; X I) INCLUDES="${OPTARG}" ;; X X) EXCLUDES="${OPTARG}" ;; X :) error_msg "$OPTARG requires a value"; print_usage ;; X \?) error_msg "unknown option $OPTARG"; print_usage ;; X esac Xdone Xshift OPTIND-1 X Xif [ $# -eq 0 -a -t 1 ] ; then X print_usage; X exit 2; Xfi X Xtest $isFUNC -ne 0 -a $isSECTION -ne 0 && badopts='TRUE' Xtest $isFUNC -ne 0 -a $isTYPE -ne 0 && badopts='TRUE' Xtest $isTYPE -ne 0 -a $isSECTION -ne 0 && badopts='TRUE' Xif [ "$badopts" ] ; then X fatal_msg "only one of -f, -s, and -t may be used" Xfi X XPATTERN="\^${KWD}:${IDENT:+[ ]*${IDENT}[^A-Z0-9]*}" X X## trim and compress all spaces and tabs in subsection lists XINCLUDES="$( tidylist $INCLUDES )" XEXCLUDES="$( tidylist $EXCLUDES )" X Xif [ "$NOTITLE" ] ; then X ENTITLED='1' Xelse X ENTITLED='0' Xfi X Xawk ' X BEGIN { X processing=0; ignore=0; entitled='$ENTITLED'; indent="'"${INDENT:- }"'"; X keyword="'"$KWD"'"; section='$isSECTION'; macros="'"${MACROS:-}"'"; X startpat="'"$STARTPAT"'"; X nincl = split( "'"$INCLUDES"'", includes, " " ); X nexcl = split( "'"$EXCLUDES"'", excludes, " " ); X } X X function is_needed(subsection) { X ## first see if it is excluded X for ( i = 1 ; i <= nexcl ; i++ ) X if ( match(subsection, excludes[i]) ) return 0; ## not-needed X X ## now see if it is included X for ( i = 1 ; i <= nincl ; i++ ) X if ( match(subsection, includes[i]) ) return 1; ## needed X X return 0; ## not-needed X } X X function uncomment(filename, textline) { X text = textline; X if ( match(filename, "^.*\.[CHchly]$") ) { ## C and C++ comments X gsub( /\/\//, "", text ); X gsub( /\/\*/, "", text ); X gsub( /\*\//, "", text ); X sub( /^[ \t]*\*\**/, "", text ); X } X else { ## assume sh, csh, or ksh comments X sub( /^[ \t]*##*/, "", text ); X } X X return text; X } X X function print_heading( level, heading, name, purpose ) { X if ( macros == "-man" ) X printf( ".SH \"%s\"\n", heading ); X else if ( macros == "-me" ) X printf( ".sh %d \"%s\"\n", level, heading ); X else if ( macros == "-mm" ) X printf( ".H %d \"%s\"\n", level, heading ); X else X printf( "%s:\n", heading ); X X if ( name != "" ) { X if ( macros != "" ) X printf( "%s \\- %s\n", name, purpose ); X else X printf( "%s%s -- %s\n", indent, name, purpose ); X } X } X X function start_paragraph( str ) { X if ( macros == "-man" ) X printf( ".PP\n" ); X else if ( macros == "-me" ) X printf( ".pp\n" ); X else if ( macros == "-mm" ) X printf( ".P\n" ); X else X printf( "%s", str ); X } X X /\^\^/ { X processing=0; ignore=0; next; X } X X /'"$PATTERN"'/ { X ++processing; purpose=$0; name="'"$IDENT"'"; X if ( name == "" ) name=FILENAME; X re = sprintf( "^.*:[ \t]*%s[-:=# \t]*", name ); X if ( re != "" ) sub( re, "", purpose ); X if ( section ) { X if ( !entitled ) { X ++entitled; X print_heading( 1, name, "", "" ); X start_paragraph( "" ); X } X else X start_paragraph( "\n" ); X } X else { X if ( !entitled ) { X ++entitled; X print_heading( 1, keyword, name, purpose ); X } X } X next; X } X X /\^[A-Z][-_A-Z0-9]*:/ { X if ( !processing ) next; X title=$0; X sub( "^.*\\^", "", title ); X sub( ":.*$", "", title ); X gsub( "[-_]", " ", title ); X if ( is_needed(title) ) { X ignore=0; X print_heading( 2, title, "", "" ); X start_paragraph( "" ); X } X else { X ++ignore; X } X next; X } X X { X if ( !processing ) next; X if ( ignore ) next; X line = uncomment(FILENAME, $0); X if ( startpat != "" ) sub( startpat, "", line ); X if ( macros == "" ) { X printf( "%s", indent ); X } X print line; X } X' "$@" X END_OF_FILE if test 6282 -ne `wc -c <'xdoc.ksh'`; then echo shar: \"'xdoc.ksh'\" unpacked with wrong size! fi chmod +x 'xdoc.ksh' # end of 'xdoc.ksh' fi if test -f 'strsplit.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'strsplit.c'\" else echo shar: Extracting \"'strsplit.c'\" \(6901 characters\) sed "s/^X//" >'strsplit.c' <<'END_OF_FILE' X/************************************************************************** X** ^FILE: strsplit.c - split and join strings X** X** ^DESCRIPTION: X** This file implemets the following functions: X** X** strsplit() -- split a string up into a vector of tokens X** strjoin() -- join a vector of tokens into a single string X** X** ^HISTORY: X** 01/02/91 Brad Appleton Created X***^^**********************************************************************/ X X#include X#include X X#define CHARNULL (vhar *)NULL Xstatic char WhiteSpace[] = " \t\n\r\v\f"; X X X/*************************************************************************** X** ^FUNCTION: strsplit - split a string into tokens X** X** ^SYNOPSIS: X*/ X#ifndef __ANSI_C__ X int strsplit( vec, token_str, separators ) X/* X** ^PARAMETERS: X*/ X char **vec[]; X/* -- pointer to the string vector to be allocated X*/ X char token_str[]; X/* -- the string to be split up X*/ X char separators[]; X/* -- the delimiters that separate tokens X*/ X#endif /* !__ANSI_C__ */ X X/* ^DESCRIPTION: X** Strsplit will split token_str up into a vector of tokens that are X** separated by one or more characters from . The number X** of tokens found is returned and storage is allocated for the given X** vector (which may later be deallocated using free()). X** X** If is NULL or empty, then the set of whitespace characters X** is used as the token delimiters. X** X** ^REQUIREMENTS: X** vec must be non-NULL (it must be a valid address). X** token_str should be non-null and non-empty X** X** ^SIDE-EFECTS: X** All leading and trailing characters from are removed X** from token_str. Furthermore, all remaining sequences in token_str X** of characters from are replaced with a single NUL-byte. X** X** Token_str holds the actual storage for all the strings in the newly X** created vector. X** X** ^RETURN-VALUE: X** The number of tokens parsed. X** X** ^ALGORITHM: X** - count the number of tokens present while at the same time removing X** all leading and trailing delimiters, and replacing all other sequences X** of delimiters with the NUL character. X** - allocate a vector large enough to point to all the token strings. X** - for i in 0 .. (numtokens - 1) do X** - vector[i] = token_str X** - advance token_str to point at the next character past the X** rightmost NUL-byte (which should be the start of the next token). X** end-for X** - return the number of tokens parsed. X***^^**********************************************************************/ X#ifdef __ANSI_C__ X int strsplit( char **vec[], char token_str[], const char separators[] ) X#endif X{ X register char c, *pread, *pwrite; X int i, count = 0; X X if ( !token_str ) return 0; X /* if delim-string is NULL, whitespace is used */ X if ( !separators ) separators = WhiteSpace; X X /* trim leading separators */ X pread = token_str; X while ( strchr(separators, *pread) ) ++pread; X token_str = pwrite = pread; X X /* X ** make first pass through string, counting # of tokens and X ** separating all tokens by a single '\0' X */ X while ( c = *pread++ ) { X if ( !strchr(separators, c) ) { X *pwrite++ = c; X } X else { X *pwrite++ = '\0'; /* null terminate this token */ X ++count; /* update token count */ X while ( strchr(separators, *pread) ) ++pread; X } X }/*while*/ X if ( *(pwrite - 1) ) { X ++count; /* dont forget last token */ X *pwrite = '\0'; /* null-terminate */ X } X X /* allocate space for the caller's vector (remember NULL at the end) */ X (*vec) = (char **)malloc( (1 + count) * sizeof( char * ) ); X if ( !*vec ) { X fprintf( stderr, "out of memory in strsplit() - aborting\n" ); X exit( -1 ); X } X X /* now go thru token-string again assigning pointers from vector */ X pread = token_str; X for ( i = 0 ; i < count ; i++ ) { X (*vec)[i] = pread; /* assign pointer */ X pread += strlen( pread ) + 1; X }/* end-for */ X X /* set up the trailing pointer to NULL at the end */ X (*vec)[ count ] = CHARNULL; X return count; X} X X X/*************************************************************************** X** ^FUNCTION: strjoin - join a vector of tokens together X** X** ^SYNOPSIS: X*/ X#ifndef __ANSI_C__ X char *strjoin( argv, separator ) X/* X** ^PARAMETERS: X*/ X char *argv[]; X/* -- pointer to the string vector to join together X*/ X char separator[]; X/* -- the the string to use to separate tokens (if NULL, " " is used) X*/ X#endif /* !__ANSI_C__ */ X X/* ^DESCRIPTION: X** Strjoin will make a single string out of the given vector by copying X** all the tokens from the given vector (in order) to a newly allocated X** string. Tokens will be separated by a single occurence of . X** X** If is NULL then a single space is used as the separator. X** If is empty, then no separator is used and the tokens are X** simply concatenated together. X** X** ^REQUIREMENTS: X** argv must be non-NULL (it must be a valid address), and must be X** terminated by a pointer to NULL (argv[last+1] == NULL). X** X** ^SIDE-EFECTS: X** Storage is allocated. X** X** ^RETURN-VALUE: X** The address of the newly-joined result (which should be deallocated X** using free()). Returns NULL if nothing was joined. X** X** ^ALGORITHM: X** - count the number of characters to place in the joined-result. X** - allocate a string large-enough to copy the joined-result into. X** - copy each string into the string (with between tokens). X** - 0 return the result. X***^^**********************************************************************/ X#ifdef __ANSI_C__ X char *strjoin( const char *argv[], const char separator[] ) X#endif X{ X size_t sz = 0; X register char *p; X register CONST char *a, **av; X register int seplen; X char *result; X X /* if argv is NULL, nothing to do */ X if ( !argv ) return CHARNULL; X if ( !separator ) separator = " "; X seplen = strlen( separator ); X X /* figure out how much space we need */ X for ( av = argv ; *av ; av++ ) { X if ( !**av ) continue; X sz += strlen( *av ); X if ( seplen && *(av + 1) ) sz += seplen; X } X X /* allocate space */ X result = (char *)malloc( (sz + 1) * sizeof(char) ); X if ( !result ) syserr( "malloc failed in strjoin()" ); X X /* join the strings together */ X *result = '\0'; X for ( av = argv, p = result ; (a = *av) ; av++ ) { X if ( !*a ) continue; X while ( (*p = *a++) ) ++p; /* copy token */ X if ( seplen && *(av + 1) ) { X a = separator; X while ( (*p = *a++) ) ++p; /* copy separator */ X }/*end-if*/ X }/*end-for*/ X X return result; X} END_OF_FILE if test 6901 -ne `wc -c <'strsplit.c'`; then echo shar: \"'strsplit.c'\" unpacked with wrong size! fi # end of 'strsplit.c' fi echo shar: End of shell archive. exit 0 ______________________ "And miles to go before I sleep." ______________________ Brad Appleton brad@ssd.csd.harris.com Harris Computer Systems uunet!hcx1!brad Fort Lauderdale, FL USA ~~~~~~~~~~~~~~~~~~~~ Disclaimer: I said it, not my company! ~~~~~~~~~~~~~~~~~~~