#!/bin/sh # # This is a shell archive. To extract its contents, # execute this file with /bin/sh to create the file(s): # # README soundex1.4gl soundex2a.c soundex4.c # mds.globals.h soundex1.c soundex2b.c soundex5.c # soundex.ec soundex2.4gl soundex3.c soundex6.c # soundex.msg # # This shell archive created: Sun Feb 11 16:40:48 EST 1996 # echo "Extracting file README" sed -e 's/^X//' <<\SHAR_EOF > README XREADME X XThis is a collection of files that contain various routines that produce or use Xthe Soundex string matching code. The files and their contents are: X X X Programs and functions written in C X X soundex1.c Program to display soundex code for one string. X X soundex2a.c Function written by Jonathan Leffler. Has #define's to X create a main() for testing. X X soundex2b.c A more recent version of the function in soundex2a.c. X X soundex3.c Program with a copy of the function in soundex2a.c that X displays soundex matches of a word from a list of words. X X soundex4.c Function callable from Informix-4GL. Doesn't appear to X zero-pad the last soundex character correctly. X X soundex5.c Another function callable from C. X X soundex6.c Function with #define's that control whether standard X soundex code or "airline" soundex code is generated X X soundex.msg Function posted to informix-list/comp.datbases.informix X X X Function writen in ESQL/C X X soundex.ec Function to return soundex code X X mds.globals.h Header file for soundex.ec X X X Functions written in Informix-4GL X X soundex1.4gl Function to return soundex code X X soundex2.4gl Function to return soundex code X X XThe following people contributed either directly or by referral to these Xfiles: X X David I. Berg X Neil Briscoe X David Cortesi X Luis P. Caamano X John Gorman X Walt Hultgren X Jonathan Leffler X Poul Pedersen X Don Simon X Naomi Walker SHAR_EOF if [ `wc -c < README` -ne 1851 ] then echo "Lengths do not match -- Bad Copy of README" fi echo "Extracting file mds.globals.h" sed -e 's/^X//' <<\SHAR_EOF > mds.globals.h X/* X * mds.globals.h Header file containing structures & definitions X * needed by ALL of the subroutines for the mds database. X */ X X/* X* X* Set Xenix to 1 so SCO xenix precompiler picksup Xenix specific X* routines not Sun OS X* X*/ X X X X#include X#include X#include X X#define FALSE 0 X#define TRUE !FALSE X/* #define XENIX 1 */ X X#ifdef XENIX X#include X#else X#include X#endif X Xextern int errno; X X/* X * Subroutine Declarations. X */ X Xchar *mktemp(), *strclip(), *strfromdate(), *index(), *getenv(), *malloc(); Xchar *strchr(); Xint strncmp(), int_search(), strlen(); SHAR_EOF if [ `wc -c < mds.globals.h` -ne 644 ] then echo "Lengths do not match -- Bad Copy of mds.globals.h" fi echo "Extracting file soundex.ec" sed -e 's/^X//' <<\SHAR_EOF > soundex.ec X/* I/O Routines for the reporting functions of the mds 4gl database */ X X#include "mds.globals.h" X X$include sqltypes; X X#define SURNAMELEN 21 X#define SOUNDEXLEN 5 X#define UPSHIFT ('a' - 'A') X X/* X * Args : 1st off - string to be `soundex'ed (usually a surname) X */ X Xmake_soundex (nargs) Xint nargs; X{ X char inputstr[SURNAMELEN]; X char workstr[SURNAMELEN]; X char longsoundex[SURNAMELEN]; X char outputstr[SOUNDEXLEN]; X char nullstr[SOUNDEXLEN]; X char *inptr, *workptr, *outworkptr, *longsoundptr, *nodupsptr, *outptr; X char ch, oldch; X X rsetnull (CCHARTYPE, nullstr); X X if (nargs != 1) { X X /* X * Close the database safely X */ X X retquote (nullstr); X return (1); X } X X /* X * Pop input string X */ X X popquote (inputstr, SURNAMELEN); X X if ((risnull (inputstr)) || (*inputstr == '\0')) { X retquote (nullstr); X return (1); X } X inptr = inputstr; X workptr = workstr; X X /* X * Remove ALL non alphabetic characters and force to uppercase X */ X X for (; ((ch = *inptr) != '\0'); inptr++) X if ((ch >= 'A') && (ch <= 'Z')) X *workptr++ = ch; X else if ((ch >= 'a') && (ch <= 'z')) X *workptr++ = (ch - UPSHIFT); X X *workptr = '\0'; X X /* X * Remove any duplicates at the beginning of the string X */ X X for (outworkptr = workptr = workstr, oldch = '\0'; ((ch = *workptr) != '\0'); workptr++) { X if (ch != oldch) X *outworkptr++ = ch; X oldch = ch; X } X X *outworkptr = '\0'; X X /* X * Test whether soundex string has any alphabetic characters in it X */ X X if (*workstr == '\0') { X retquote (nullstr); X return (1); X } X for (workptr = (workstr + 1), longsoundptr = longsoundex; ((ch = *workptr) != '\0'); workptr++) X switch (ch) { X case 'B': X case 'F': X case 'P': X case 'V': X *longsoundptr++ = '1'; X break; X X case 'C': X case 'G': X case 'J': X case 'K': X case 'Q': X case 'S': X case 'X': X case 'Z': X *longsoundptr++ = '2'; X break; X X case 'D': X case 'T': X *longsoundptr++ = '3'; X break; X X case 'L': X *longsoundptr++ = '4'; X break; X X case 'M': X case 'N': X *longsoundptr++ = '5'; X break; X X case 'R': X *longsoundptr++ = '6'; X break; X } X X *longsoundptr = '\0'; X X /* X * Remove any duplicates. eg. "11234453" --> "123453" X */ X X for (longsoundptr = nodupsptr = longsoundex, oldch = '0'; ((ch = *longsoundptr) != '\0'); longsoundptr++) { X if (ch != oldch) X *nodupsptr++ = ch; X oldch = ch; X } X X *nodupsptr = '\0'; X X /* X * Copy 1st character from upshifted original and then upto 3 digits from the longsoundex X */ X X outputstr[0] = *workstr; X X for (outptr = (outputstr + 1), longsoundptr = longsoundex; (((ch = *longsoundptr) != '\0') && (longsoundptr <= (longsoundex + X 3))); longsoundptr++) X *outptr++ = ch; X X *outptr = '\0'; X X X retquote (outputstr); X return (1); X} X X SHAR_EOF if [ `wc -c < soundex.ec` -ne 3234 ] then echo "Lengths do not match -- Bad Copy of soundex.ec" fi echo "Extracting file soundex.msg" sed -e 's/^X//' <<\SHAR_EOF > soundex.msg XFrom: pp@q8.dk (Poul Pedersen) XSubject: Re: Help with Soundex code.. XDate: 17 Jan 1996 10:08:07 -0500 X XHelp with Soundex code.. 17-Jan-1996 X XHi friend, I found this one, hope you can use it: X X/*************************************************************************** X** soundex Transform the argument into a code that tends to bring X** together all variants of the same name. X** X** soundex("Poul") = P400 X** soundex("Paul") = P400 X** soundex("Peter") = P360 X** soundex("Peder") = P360 X** X** The method was originally developed by Margaret K. Odell X** and Robert C. Russell. X** X** Retain the first letter of the name, and drop all X** occurrences of a, e, h, i, o, u, w and y in other positions. X** X** Assign the following numbers to the remaining letters after X** the first: X** X** b,f,p,v -> 1 l -> 4 X** c,g,j,k,q,s,x,z -> 2 m,n -> 5 X** d,t -> 3 r -> 6 X** X** If two or more letters with the same code were adjacent in X** the original name, omit all but the first. X** X** Convert to the form "letter, digit, digit, digit" by adding X** trailingg zeros (if there are less than three digits), X** or by droppingg rightmost digits (if there are more than X** three). X** X***************************************************************************/ X#include X#define SOUNDEXLEN 4 Xchar *soundex(original_name) Xchar *original_name; X { X static char mapstring[] = "01230120022455012623010202"; X static char strbuffer[SOUNDEXLEN+1]; X register int index; X register char mapch, upperch, pch = '0'; X X strcpy(strbuffer,"Z000"); X for (index = 0; *original_name && index < SOUNDEXLEN; original_name++) X if (isalpha(*original_name)) { X upperch = toupper(*original_name); X mapch = mapstring[upperch-'A']; X if (index == 0 || (mapch != '0' && mapch != pch)) { X strbuffer[index] = index ? mapch : upperch; X index++; X } X pch = mapch; X } X return(strbuffer); X} X XRegards X----------------------------------------------------------------- XDatabaseadministrator Poul Pedersen XKuwait Petroleum (Danmark) A/S XHummeltoftevej 49 Mail: pp@q8.dk X2830 Virum Voice: +45 45 98 45 94 XDenmark Fax: +45 45 83 16 95 X----------------------------------------------------------------- X X>Hi friends, X> X>I am looking for the Informix code written for Soundex example. X>I know it exists, b'caz someone saw it 3 yrs back on internet. X>can someone help me with this. X> X>thanks... X> X> X>-- X>dharmesh X>-- X>When all other means of communication fail, Try Silence. X>==============================================+============== X SHAR_EOF if [ `wc -c < soundex.msg` -ne 2640 ] then echo "Lengths do not match -- Bad Copy of soundex.msg" fi echo "Extracting file soundex1.4gl" sed -e 's/^X//' <<\SHAR_EOF > soundex1.4gl X{ soundex.4gl Calculate Soundex Code } X X X{ X Summary: Calculate the 4-character Soundex code for a supplied string X X Environment: SunOS 4.1, Informix-4GL X X Submitted by: Walt Hultgren X X X This function will return the 4-character Soundex code for the string X supplied by the calling routine. The "char(100)" definition of "str" X should be tailored to fit your needs. X X} X X Xfunction soundex ( str ) X X X define str char(100), { supplied string } X str_leng smallint, { length of string } X i_str smallint, { pointer to string characters } X X s_code char(4), { soundex code } X i_code smallint, { pointer to characters of soundex code } X X char3 char(3), X char1 char(1), X p_char1 char(1) X X X { X Initilize soundex code and check length of supplied string X } X X let s_code = "0000" X X if ( str is NULL ) then let str_leng = 0 X else let str_leng = length ( str ) X end if X X X { X Calculate soundex code if string is non-NULL X } X X if ( str_leng > 0 ) X then X X { X Step through letters in string X } X X let i_code = 0 X X for i_str = 1 to str_leng X X let char1 = upshift ( str [ i_str, i_str ] ) X X if ( char1 < "A" or char1 > "Z" ) X then X continue for X end if X X X { X If first letter, start soundex code with it X } X X if ( i_code = 0 ) X then X let s_code[1,1] = char1 X let i_code = 1 X let p_char1 = "0" X X continue for X end if X X X { X Get group code for this letter X } X X let char3 = "*", char1, "*" X X case X when ( "BFPV" matches char3 ) let char1 = "1" exit case X when ( "CGJKSXZ" matches char3 ) let char1 = "2" exit case X when ( "DT" matches char3 ) let char1 = "3" exit case X when ( "L" matches char3 ) let char1 = "4" exit case X when ( "MN" matches char3 ) let char1 = "5" exit case X when ( "R" matches char3 ) let char1 = "6" exit case X otherwise let char1 = "0" exit case X end case X X X { X If group code is non-zero and not the same as the previous code, X append it to soundex code. Stop after 4 soundex characters. X } X X if ( char1 <> p_char1 ) X then X X if ( char1 <> "0" ) X then X X let i_code = i_code + 1 X let s_code[i_code,i_code] = char1 X X if ( i_code = 4 ) X then X exit for X end if X X end if X X let p_char1 = char1 X X end if X X end for X X end if X X X { X Return soundex code to calling routine X } X X return s_code X X Xend function SHAR_EOF if [ `wc -c < soundex1.4gl` -ne 3168 ] then echo "Lengths do not match -- Bad Copy of soundex1.4gl" fi echo "Extracting file soundex1.c" sed -e 's/^X//' <<\SHAR_EOF > soundex1.c X/*** X* SOUNDEX ALGORITHM in C * X* * X* The basic Algorithm source is taken from EDN Nov. * X* 14, 1985 pg. 36. * X* * X* As a test Those in Illinois will find that the * X* first group of numbers in their drivers license * X* number is the soundex number for their last name. * X* * X* RHW PC-IBBS ID. #1230 * X* * X****************************************************************/ X X#include X Xchar (*soundex(out_pntr, in_pntr)) Xchar *in_pntr; Xchar *out_pntr; X{ Xextern char get_scode(); Xchar ch,last_ch; Xint count = 0; X X strcpy(out_pntr,"0000"); /* Pre-fill output string for */ X /* error and trailing zeros. */ X *out_pntr = toupper(*in_pntr); /* Copy first letter */ X last_ch = get_scode(*in_pntr); /* code of the first letter */ X /* for the first 'double-letter */ X /* check. */ X /* Loop on input letters until */ X /* end of input (null) or output */ X /* letter code count = 3 */ X X while( (ch = get_scode(*(++in_pntr)) ) && (count < 3) ) X { X if( (ch != '0') && (ch != last_ch) ) /* if not skipped or double */ X *(out_pntr+(++count)) = ch; /* letter, copy to output */ X last_ch = ch; /* save code of last input letter for */ X /* next double-letter check */ X } X return(out_pntr); /* pointer to input string */ X} X Xchar get_scode(ch) Xchar ch; X{ X /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ X /* :::::::::::::::::::::::::: */ Xstatic char soundex_map[] = "01230120022455012623010202"; X X /* If alpha, map input letter to soundex code. If not, return 0 */ X X if( !isalpha(ch) ) /*error if not alpha */ X return(0); X else X return(soundex_map[(toupper(ch) - 'A')] ); X} X Xmain(argc, argv) Xint argc; Xchar *argv[]; X{ Xchar *code[10]; X Xint i; X X if(argc == 1) /* No arguments, give usage */ X { X printf("\nUsage: soundex (name) (...)\n"); X exit(1); X } X X X for(i = 1; i < argc; i++) X { X soundex(code, argv[i]) ; X X printf("The Soundex Code for \"%s\" is: %s\n", argv[i],code); X } X X exit(0); X} SHAR_EOF if [ `wc -c < soundex1.c` -ne 2918 ] then echo "Lengths do not match -- Bad Copy of soundex1.c" fi echo "Extracting file soundex2.4gl" sed -e 's/^X//' <<\SHAR_EOF > soundex2.4gl XFrom: infmx!jupiter!dsimon@uunet.UU.NET (Don Simon) XDate: Mon, 21 Dec 92 12:23:55 CST XSubject: Re: Fuzzy Text search routine for 4GL ?? X XHere is the requested code. X Xdon simon X X TechInfo # 3653 X XShort Description: XSoundex Sample Code X XLong Description: XFrom: cortesi@godzilla (David Cortesi) X X{ ==================================================================== X Someone (sorry, forget who) asked about the Soundex algorithm. X Here it is, from Knuth's Art of Computer Programming, Vol 3, X Searching and Sorting (p 391-2). quote: X X The following `soundex' method, which was originally developed X by Margaret K. Odell and Robert C. Russell [cf. US Patents 1261167 X (1918), 1435663 (1922)], has often been used for encoding surnames: X X 1. Retain the first letter of the name, and drop all occurrences X of a,e, h, i, o, u, w, y in other positions. X X 2. Assign the following numbers to the remaining letters after X the first: X X b,f,p,v --> 1 X c,g,j,k,q,s,x,z --> 2 X d,t --> 3 X l --> 4 X m, n --> 5 X r --> 6 X X 3. If two or more letters with the same code were adjacent IN THE X ORIGINAL NAME (before step 1) [emphasis added - dec], omit all but X the first. X X 4. Convert to the form letter, digit, digit, digit by adding trailing X zeros or by dropping rightmost digits. X X end quote. X X Knuth goes on to note that Soundex brings some nonsimilar names X together, e.g. Knuth and Kant, and separates some similar ones X e.g. Rogers, Rodgers. "But by and large the Soundex code greatly X increases the chance of finding a name in one of its disguises." X X In implementing the above spec I realized it is ambiguous on the X question of how to handle names that start with a doubled letter X such as Lloyd. However from the examples Knuth gives you can deduce X that they are to be compressed out (Lloyd --> L400, not L430). X There seems to be an assumption of dealing with only alphabetics. X Presumably nonalphas should be discarded. There is no indication X of how to handle mixed letter case. The following function forces X its input to all upper case immediately. A mixed-case function X is more complicated. X X The following 4GL function implements Soundex. The sequence X of operations is not quite the same as Knuth gives. In order to X make it easier to detect digits that *were* adjacent before vowels X are removed, it defers removal of the vowels until the final step. X X The function takes a string of up to 64 characters. It returns a X string of exactly 4 characters, the Soundex code of the input. X X============================================================================ } X XMAIN XDEFINE tst,ans CHAR(64) XWHILE TRUE X PROMPT "test string: " FOR ans X LET tst = soundex(ans) X DISPLAY "<" , tst CLIPPED , ">" XEND WHILE XEND MAIN X XFUNCTION soundex( inp ) XDEFINE X inp CHAR(64), X wrk CHAR(65), X len, j, k SMALLINT X X LET len = LENGTH(inp) X IF len == 0 THEN -- all blank or null X RETURN " 000" X END IF X X LET inp = UPSHIFT(inp) X X IF inp[2] == inp[1] THEN -- it's Lloyd! Hiya, Lloyd! X LET inp = inp[2,len] X LET len = len - 1 X END IF X X LET wrk = inp[1,1] -- initial letter, blank padded to 64 chars X FOR j = 2 TO len X CASE X WHEN inp[j] MATCHES "[BFPV]" X LET wrk[j] = "1" X WHEN inp[j] MATCHES "[CGJKQSXZ]" X LET wrk[j] = "2" X WHEN inp[j] MATCHES "[DT]" X LET wrk[j] = "3" X WHEN inp[j] == "L" X LET wrk[j] = "4" X WHEN inp[j] MATCHES "[MN]" X LET wrk[j] = "5" X WHEN inp[j] == "R" X LET wrk[j] = "6" X OTHERWISE X LET wrk[j] = "0" X END CASE X END FOR X X LET j = 2 -- wrk[j] is next char to inspect X LET k = 1 -- wrk[k] is last char to keep X LET wrk[len + 1,len + 1] = "9" -- sentinel to stop scan X WHILE wrk[j] <> "9" -- scan off 0s and duplicate non-0s X IF wrk[j] == "0" THEN X LET j = j + 1 X ELSE X LET k = k + 1 X LET wrk[k] = wrk[j] X X WHILE wrk[k] == wrk[j] X LET j = j + 1 X END WHILE X END IF X END WHILE X X IF k < 4 THEN X LET wrk[k+1,k+3] = "000" X END IF X X RETURN wrk[1,4] XEND FUNCTION SHAR_EOF if [ `wc -c < soundex2.4gl` -ne 4336 ] then echo "Lengths do not match -- Bad Copy of soundex2.4gl" fi echo "Extracting file soundex2a.c" sed -e 's/^X//' <<\SHAR_EOF > soundex2a.c X/* X** SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits X** 4. Truncate or ser-pad to 4-character result. X** X** Originally formatted with tabstops set at 4 spaces -- you were Xwarned! X** X** Code by: Jonathan Leffler (john@sphinx.co.uk) X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*/ X X#include Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X X/* X** Soundex for arbitrary number of characters of information X*/ Xchar *nsoundex(str, n) Xchar *str; /* In: String to be converted */ Xint n; /* In: Number of characters in result string X*/ X{ X static char buff[10]; X register char *s; X register char *t; X char c; X char l; X X if (n <= 0) X n = 4; /* Default */ X if (n > sizeof(buff) - 1) X n = sizeof(buff) - 1; X t = &buff[0]; X X for (s = str; ((c = *s) != '\0') && t < &buff[n]; s++) X { X if (!isascii(c)) X continue; X if (!isalpha(c)) X continue; X c = toupper(c); X if (t == &buff[0]) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; X if (c != '0' && c != l) X l = *t++ = c; X } X while (t < &buff[n]) X *t++ = '0'; X *t = '\0'; X return(&buff[0]); X} X X/* Normal external interface */ Xchar *soundex(str) Xchar *str; X{ X return(nsoundex(str, 9)); X} X X/* X** Alternative interface: X** void soundex(given, gets) X** char *given; X** char *gets; X** { X** strcpy(gets, nsoundex(given, 4)); X** } X*/ X X X#ifdef TEST X#include Xmain() X{ X char buff[30]; X X while (fgets(buff, sizeof(buff), stdin) != (char *)0) X printf("Given: %s Soundex produces %s\n", buff, X soundex(buff)); X} X#endif SHAR_EOF if [ `wc -c < soundex2a.c` -ne 2839 ] then echo "Lengths do not match -- Bad Copy of soundex2a.c" fi echo "Extracting file soundex2b.c" sed -e 's/^X//' <<\SHAR_EOF > soundex2b.c X/* X@(#)File: soundex.c X@(#)Version: 1.2 X@(#)Last changed: 89/12/18 X@(#)Purpose: Produce SOUNDEX code for string X@(#)Author: Jonathan Leffler (john@sphinx.co.uk) X*/ X X/* X** SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits if they come from adjacent characters. X** (Corrected by: Raymond Chen ) X** 4. Truncate or zero-pad to 4-character result. X** X** Originally formatted with tabstops set at 4 spaces -- you were warned! X** X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*/ X X#include X X#ifndef lint Xstatic char sccs[] = "@(#)soundex.c 1.2 89/12/18"; X#endif X Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X X/* X** Soundex for arbitrary number of characters of information X*/ Xchar *nsoundex(str, n) Xchar *str; /* In: String to be converted */ Xint n; /* In: Number of characters in result string */ X{ X static char buff[10]; X register char *s; X register char *t; X char c; X char l; X X if (n <= 0) X n = 4; /* Default */ X if (n > sizeof(buff) - 1) X n = sizeof(buff) - 1; X t = &buff[0]; X X for (s = str; ((c = *s) != '\0') && t < &buff[n]; s++) X { X if (!isascii(c) || !isalpha(c)) X continue; X c = toupper(c); X if (t == &buff[0]) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; /* Assumes ASCII */ X if (c != '0' && c != l) X *t++ = c; X l = c; X } X while (t < &buff[n]) X *t++ = '0'; X *t = '\0'; X return(&buff[0]); X} X X/* Normal external interface */ Xchar *soundex(str) Xchar *str; X{ X return(nsoundex(str, 4)); X} X X/* X** Alternative interface: X** void soundex(given, gets) X** char *given; X** char *gets; X** { X** strcpy(gets, nsoundex(given, 4)); X** } X*/ X X X#ifdef TEST X#include Xmain() X{ X char buff[30]; X X printf("String? "); X while (fgets(buff, sizeof(buff), stdin) != (char *)0) X { X printf("String : %sSoundex: %s\n", buff, soundex(buff)); X printf("String? "); X } X putchar('\n'); X} X#endif SHAR_EOF if [ `wc -c < soundex2b.c` -ne 3032 ] then echo "Lengths do not match -- Bad Copy of soundex2b.c" fi echo "Extracting file soundex3.c" sed -e 's/^X//' <<\SHAR_EOF > soundex3.c X#include X#include X X#define TRUE 1 X#define FALSE 0 X X#define DEFAULT_DICT "/usr/dict/words" X#define PATTERN_SIZE 6 X X#define my_toupper(x) (islower(x) ? toupper(x) : (x)) X Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X Xchar *soundex(); X X Xmain (argc, argv) Xint argc; Xchar *argv[]; X{ X int count; X char pattern[PATTERN_SIZE]; X FILE *fp, *fopen(); X X if (argc < 2) X { X fprintf (stderr, "Usage: %s word [ - | wordlist ...]\n", X argv[0]); X fprintf (stderr, " use \"-\" to read from stdin.\n"); X exit (1); X } X X strcpy (pattern, soundex(argv[1])); X X if (argc == 2) /* use default dictionary */ X { X if ((fp = fopen (DEFAULT_DICT, "r")) == NULL) X { X fprintf (stderr, "%s: Cannot open %s for reading\n", X argv[0], DEFAULT_DICT); X } X else X { X match (fp, pattern); X fclose(fp); X } X } X else /* use specified dictionaries */ X { X for (count = 2 ; count < argc ; count++) X { X if (strcmp (argv[count], "-") == 0) X { X match (stdin, pattern); X } X else if ((fp = fopen (argv[count], "r")) == NULL) X { X fprintf (stderr, "%s: Cannot open %s for reading\n", X argv[0], argv[count]); X } X else X { X match (fp, pattern); X fclose(fp); X } X } X } X exit (0); X} X X/************************************************************************ X/* X/* X/************************************************************************/ X Xint match (fp, pattern) Xregister FILE *fp; Xregister char *pattern; X{ X char word[BUFSIZ]; X register char *wordp = &word[0]; X X /* read all words before our stuff */ X while (fgets(wordp, BUFSIZ - 1,fp) != NULL && *pattern != my_toupper X (*wordp)) X ; X X if (wordp == NULL) X return (FALSE); X X word[strlen(word) - 1] = '\0'; /* remove the \n */ X if (the_same (pattern, soundex(wordp))) X { X puts (word); X } X X while (fgets(wordp, BUFSIZ - 1,fp) != NULL) X { X if (*pattern != my_toupper (*wordp)) /* give it up */ X { X break; X } X word[strlen(word) - 1] = '\0'; /* remove the \n */ X if (the_same (pattern, soundex(wordp))) X { X puts (word); X } X } X return (TRUE); X} X X X X/************************************************************************ X/* X/* X/************************************************************************/ X Xint the_same (str1, str2) Xregister char *str1; Xregister char *str2; X{ X while (*(str1++) == *(str2++)) X if (*str1 == '\0') X return (TRUE); X return (FALSE); X} X X X/* X** SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits X** 4. Truncate or ser-pad to 4-character result. X** X** Originally formatted with tabstops set at 4 spaces -- you were Xwarned! X** X** Code by: Jonathan Leffler (john@sphinx.co.uk) X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*/ X X/* X** Soundex for arbitrary number of characters of information X*/ Xchar *soundex(str) Xchar *str; /* In: String to be converted */ X{ X static char buff[PATTERN_SIZE +1]; X register char *s; X register char *t; X char c; X char l; X X t = &buff[0]; X X for (s = str; ((c = *s) != '\0') && t < &buff[PATTERN_SIZE]; s++) X { X if (!isascii(c)) X continue; X if (!isalpha(c)) X continue; X if (islower(c)) X c = toupper(c); X if (t == &buff[0]) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; X if (c != '0' && c != l) X l = *t++ = c; X } X while (t < &buff[PATTERN_SIZE]) X *t++ = '0'; X *t = '\0'; X return(&buff[0]); X} SHAR_EOF if [ `wc -c < soundex3.c` -ne 4996 ] then echo "Lengths do not match -- Bad Copy of soundex3.c" fi echo "Extracting file soundex4.c" sed -e 's/^X//' <<\SHAR_EOF > soundex4.c X/*** X* X* soundex() X* adopted for 4gl X* X**/ X#include "stdio.h" X#include "ctype.h" X#define MAX_DIGITS 4 /** number of digits in code sequence **/ X Xstatic char omit_letter[] ={"AEHIOUWY"}; Xstatic char *code_group[] = X{ X "BFPV", "CGJKQSXZ", "DT", "L", "MN", "R", NULL X}; X/* XIf one wants to test this stand alone than remove the comments X X#define popquote(a,b) strcpy(a,str) X#define pushquote(a) strcpy(rstr,a) Xchar str[100]; Xchar rstr[100]; Xmain() X{ X while(gets(str)) X { X if(strcmp (str,"end") == 0) break; X soundex(1); X *str = 0; X printf("code = %s for name %s\n",rstr,str); X } X} X*/ Xsoundex(args) Xint args; X{ X char name_for_soundex[100]; X char soundex[MAX_DIGITS]; X char *p; X int k,i,j; X X popquote(name_for_soundex,sizeof(name_for_soundex)); X X /** X first character not translated. But shouldn't we make sure X it is an alpha? In case a funny user enters " Smith"; X **/ X for(i = j = 0; !isalpha(name_for_soundex[i]);i++) X ; X X soundex[j++] = toupper(name_for_soundex[i]); X X for (i++;j < MAX_DIGITS && name_for_soundex[i];i++) X { X /** X Only alpha's are allowed why this should return an error X if the letter isn't an alpha I don't know. X it would prohibit names like D'artagne. Who was X one of the three musketeers X **/ X if (isalpha(name_for_soundex[i])) X { X name_for_soundex[i] = toupper(name_for_soundex[i]); X if (strchr(omit_letter,name_for_soundex[i]) == NULL) X { X for(k = 0; code_group[k];k++) X { X if(strchr(code_group[k],name_for_soundex[i])) X soundex[j++] = (k + 1) + 48; X } X } X } X } X X /** fill string if neccesary **/ X while (j < MAX_DIGITS) X { X soundex[j++] = '0'; X } X soundex[j] = '\0'; X pushquote(soundex); X return(args); X} SHAR_EOF if [ `wc -c < soundex4.c` -ne 1980 ] then echo "Lengths do not match -- Bad Copy of soundex4.c" fi echo "Extracting file soundex5.c" sed -e 's/^X//' <<\SHAR_EOF > soundex5.c X/* X * Reference: Adapted from Knuth, D.E. (1973) The art of computer programming; X * Volume 3: Sorting and searching. Addison-Wesley Publishing Company: X * Reading, Mass. Page 392. X * X * 1. Retain the first letter of the name, and drop all occurrences of X * a, e, h, i, o, u, w, y in other positions. X * X * 2. Assign the following numbers to the remaining letters after the first: X * b, f, p, v -> 1 l -> 4 X * c, g, j, k, q, s, x, z -> 2 m, n -> 5 X * d, t -> 3 r -> 6 X * X * 3. If two or more letters with the same code were adjacent in the original X * name (before step 1), omit all but the first. X * X * 4. Convert to the form ``letter, digit, digit, digit'' by adding trailing X * zeros (if there are less than three digits), or by dropping rightmost X * digits (if there are more than three). X * X * The examples given in the book are: X * X * Euler, Ellery E460 X * Gauss, Ghosh G200 X * Hilbert, Heilbronn H416 X * Knuth, Kant K530 X * Lloyd, Ladd L300 X * Lukasiewicz, Lissajous L222 X * X * Most algorithms fail in two ways: X * 1. they omit adjacent letters with the same code AFTER step 1, not before. X * 2. they do not omit adjacent letters with the same code at the beginning X * of the name. X * X */ X X#include X#include X X#define SDXLEN 4 X Xchar *soundex(name) Xchar *name; X{ Xstatic char buf[SDXLEN+1]; Xstatic char map[] = "01230120022455012623010202"; Xregister char mc, uc, pc = '0'; Xregister int idx; X Xstrcpy(buf,"Z000"); X Xfor (idx = 0; *name && idx < SDXLEN; name++) X if (isalpha(*name)) { X uc = toupper(*name); X mc = map[uc-'A']; X if (idx == 0 || (mc != '0' && mc != pc)) { X buf[idx] = idx ? mc : uc; X idx++; X } X pc = mc; X } Xreturn(buf); X} SHAR_EOF if [ `wc -c < soundex5.c` -ne 1928 ] then echo "Lengths do not match -- Bad Copy of soundex5.c" fi echo "Extracting file soundex6.c" sed -e 's/^X//' <<\SHAR_EOF > soundex6.c XFrom: anasaz!qip.naomi@enuucp.eas.asu.edu (Naomi Walker) XSubject: Re: Fuzzy Text search routine for 4GL ?? XDate: Mon, 7 Dec 92 10:23:47 MST X X/*TITLE r_soundex.c - Calculate Soundex Code for a String - 1.3.1.1 */ X/* X** (]$[) r_soundex.c:1.3.1.1 | CDATE= 6/26/90 13:19:25 X** X** Copyright 1990, Anasazi, Inc. X** All Rights Reserved X*/ X X/* X** Two types of soundex coding exist, default and airline. X** X** The following are the rules for default coding: X** DEFAULT SOUNDEX CODING X** X** Rules: X** 1. Retain the first letter; ignore non-alphabetic characters. X** 2. Replace second and subsequent characters by a group code. X** Group Letters X** 1 BFPV X** 2 CGJKSXZ X** 3 DT X** 4 L X** 5 MN X** 6 R X** 3. Do not repeat digits X** 4. Truncate or ser-pad to 4-character result. X** X** Code by: Jonathan Leffler (john@sphinx.co.uk) X** This code is shareware -- I wrote it; you can have it for free X** if you supply it to anyone else who wants it for free. X** X** BUGS: Assumes ASCII X*********************************************************************** X** The following are the rules for airline coding: X** AIRLINE SOUNDEX CODING X** X** Rules: X** 1. Eliminate Double Consonants X** 2. Eliminate Vowels X** 3. If resulting code is greater than 6 char, use first and X** last three characters. X*********************************************************************** X*/ X X#include X#include X#include X#include "config.h" X Xstatic char lookup[] = { X '0', /* A */ X '1', /* B */ X '2', /* C */ X '3', /* D */ X '0', /* E */ X '1', /* F */ X '2', /* G */ X '0', /* H */ X '0', /* I */ X '2', /* J */ X '2', /* K */ X '4', /* L */ X '5', /* M */ X '5', /* N */ X '0', /* O */ X '1', /* P */ X '0', /* Q */ X '6', /* R */ X '2', /* S */ X '3', /* T */ X '0', /* U */ X '1', /* V */ X '0', /* W */ X '2', /* X */ X '0', /* Y */ X '2', /* Z */ X}; X X Xchar *r_soundex(to, from, n) Xchar *to; /* Out: Place to put soundex code */ Xchar *from; /* In: String to be converted */ Xint n; /* In: Number of characters in result froming */ X{ X register char *s; X register char *t; X char c; X char l; X char tolim[200]; X char *to1 = &tolim[0]; X char fromlim[200]; X char *from1 = &fromlim[0]; X char last; X long saveda; X long savedb; X int x, len; X XDBUG_ENTER( "r_soundex" ); X X#ifdef DEFAULT_SOUNDEX X DBUG_PRINT("r_soundex",("DEFAULT SOUNDEX CODING")); X if (n <= 0) X n = 4; /* Default */ X X t = to; X X for (s = from; ((c = *s) != '\0') && t < &to[n]; s++) X { X if (!isascii(c)) X continue; X if (!isalpha(c)) X continue; X c = toupper(c); X if (t == to) X { X l = *t++ = c; X continue; X } X c = lookup[c-'A']; X if (c != '0' && c != l) X l = *t++ = c; X } X while (t < &to[n]) X *t++ = '0'; X *t = '\0'; X X DBUG_RETURN(to); X#endif X X X#ifdef AIRLINE_SOUNDEX X/* Airline Soundex */ X X DBUG_PRINT("r_soundex",("AIRLINE SOUNDEX CODING")); X X /* Eliminate repeating consonants */ X last = '\0'; X X saveda = (long)to1; X for (; *from != '\0'; ++from, ++to1 ) { X *from = (toupper(*from)); X *to1 = *from; X if (*from == last) X *to1--; X last = *from; X } X *to1 = '\0'; X X /* reset to beginning for string */ X to1 = (char *)saveda; X DBUG_PRINT("r_soundex",("step 1 -----to=%s\n",to1)); X X savedb = (long)from1; X /* Eliminate all vowels, except in the first postition, and all blanks */ X for (x = 0; *to1 != '\0'; ++to1, ++from1 , x++) { X *from1 = *to1; X X /* if not first position, and vowel, remove from string */ X if (x && (*from1 == 'A' || *from1 == 'E' || *from1 == 'I' || *from1 == 'O' || X *from1 == 'U') ) X *from1--; X X if (*from1 == ' ') X *from1--; X } X *from1 = '\0'; X from1 = (char *)savedb; X to1 = (char *)saveda; X X DBUG_PRINT("r_soundex",("step 2 ---- from1=%s len=%d\n",from1, strlen(from1))); X X len = strlen(from1); X if (len > 6) { X /* save the first three chars */ X for (x = 0; x < 3; x++) { X to1[x] = from1[x]; X } X X /* and the last three chars */ X to1[3] = from1[(len-3)]; X X to1[4] = from1[(len-2)]; X X to1[5] = from1[(len-1)]; X X to1[6] = '\0'; X strcpy(to, to1); X X } X else /* length is not greater than 6 */ X strcpy(to, from1); X X DBUG_PRINT("r_soundex",("step 3 -----to=%s\n",to)); X X DBUG_RETURN(&to[0]); X X#endif X} X X-- X Naomi Walker (aka N7FSA) naomi%anasaz.UUCP@asuvax.eas.asu.edu X X Enthusiasm is caught, In tennis, X not taught. love means nothing. SHAR_EOF if [ `wc -c < soundex6.c` -ne 4385 ] then echo "Lengths do not match -- Bad Copy of soundex6.c" fi echo "Done." exit 0