Path: utzoo!attcan!uunet!wyse!vsi1!ames!nrl-cmf!ukma!rutgers!bellcore!tness7!ninja!cpe!tif From: tif@cpe.UUCP Newsgroups: comp.sources.wanted Subject: Re: SOUNDEX algorithm Message-ID: <2300009@cpe> Date: 17 Sep 88 04:01:00 GMT References: <39768@pyramid.pyramid.com> Lines: 146 Nf-ID: #R:pyramid.pyramid.com:39768:cpe:2300009:000:3933 Nf-From: cpe.UUCP!tif Sep 16 23:01:00 1988 Written 5:52 pm Sep 15, 1988 by pyramid.UUCP!dhaile in cpe:comp.sources.w >Hi y'all! Had a need all of a sudden for a SOUNDEX or pseudo-SOUNDEX I'll post this since it's small and he didn't say "I don't usually read this group" :-) It comes from a bigger package which is a spelling aid (although the spelling aid could not meet my requirements). Note that SOUNDEX is a pretty crude algorithm, nothing like phonemes. (Somebody asked for that too, wish I had it to give.) Paul Chamberlain Computer Product Engineering, Tandy Corp. {convex,killer}!ninja!cpe!tif #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create: # calcsoundex.c # This archive created: Fri Sep 16 22:54:34 1988 export PATH; PATH=/bin:/usr/bin:$PATH echo shar: "extracting 'calcsoundex.c'" '(2455 characters)' if test -f 'calcsoundex.c' then echo shar: "will not over-write existing file 'calcsoundex.c'" else sed 's/^ X//' << \SHAR_EOF > 'calcsoundex.c' X/* vi: set tabstop=4 : */ X X/* X * calcsoundex - calculate soundex codes X * X * Permission is given to copy or distribute this program provided you X * do not remove this header or make money off of the program. X * X * Please send comments and suggestions to: X * Barry Brachman X * Dept. of Computer Science X * Univ. of British Columbia X * Vancouver, B.C. V6T 1W5 X * X * .. {ihnp4!alberta, uw-beaver}!ubc-vision!ubc-cs!brachman X * brachman@cs.ubc.cdn X * brachman%ubc.csnet@csnet-relay.arpa X * brachman@ubc.csnet X */ X X#include X#include X X#include "sp.h" X Xchar word[MAXWORDLEN + 2]; X Xchar soundex_code_map[26] = { X/*** A B C D E F G H I J K L M N O P ***/ X 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, X X/*** Q R S T U V W X Y Z ***/ X 2, 6, 2, 3, 0, 1, 0, 2, 0, 2 X}; X Xmain(argc, argv) Xint argc; Xchar **argv; X{ X register int c, i, soundex_length, digit_part, previous_code; X int ch, len, vflag; X short soundex; X char *gets(); X X vflag = 0; X if (argc > 2 || (argc == 2 && strcmp(argv[1], "-v"))) { X fprintf(stderr, "Usage: calcsoundex [-v]\n"); X exit(1); X } X if (argc > 1) X vflag = 1; X X while (fgets(word, sizeof(word), stdin) != (char *) NULL) { X len = strlen(word); X if (word[len - 1] != '\n') { X fprintf(stderr, "calcsoundex: Word too long: %s", word); X while ((ch = getchar()) != '\n') /* flush rest of line */ X putc(ch, stderr); X putc('\n', stderr); X continue; X } X word[--len] = '\0'; X if (len > MAXWORDLEN) { X fprintf(stderr, "calcsoundex: Word too long: %s\n", word); X continue; X } X X for (i = 0; word[i] != '\0'; i++) { X if (isupper(word[i])) X word[i] = tolower(word[i]); X } X if (!isalpha(word[0])) X continue; X X digit_part = 0; X soundex_length = 0; X previous_code = soundex_code_map[word[0] - 'a']; X for (i = 1; word[i] != '\0' && soundex_length < 3; i++) { X if (!isalpha(word[i])) X continue; X c = soundex_code_map[word[i] - 'a']; X if (c == 0 || previous_code == c) { X previous_code = c; X continue; X } X digit_part = digit_part * 10 + c; X previous_code = c; X soundex_length++; X } X while (soundex_length++ < 3) X digit_part *= 10; X soundex = digit_part << 5 + word[0] - 'a'; X printf("%c", word[0]); X if (digit_part < 100) X putchar('0'); X if (digit_part < 10) X putchar('0'); X if (digit_part == 0) X putchar('0'); X else X printf("%d", digit_part); X if (vflag) X printf(" %s", word); X putchar('\n'); X } X putchar('\n'); X exit(0); X} X SHAR_EOF if test 2455 -ne "`wc -c < 'calcsoundex.c'`" then echo shar: "error transmitting 'calcsoundex.c'" '(should have been 2455 characters)' fi fi exit 0 # End of shell archive