Path: utzoo!utgpu!jarvis.csri.toronto.edu!mailrus!wasatch!cs.utexas.edu!uunet!mcvax!ukc!etive!aiai!richard From: richard@aiai.ed.ac.uk (Richard Tobin) Newsgroups: comp.unix.wizards Subject: Re: NFS, hung processes Keywords: NFS,hang,process,server,client Message-ID: <658@skye.ed.ac.uk> Date: 1 Aug 89 17:21:38 GMT References: <24D1DF49.7A5@marob.masa.com> <13134@bloom-beacon.MIT.EDU> Reply-To: richard@aiai.UUCP (Richard Tobin) Organization: AIAI, University of Edinburgh, Scotland Lines: 252 In article <13134@bloom-beacon.MIT.EDU> jik@athena.mit.edu (Jonathan I. Kamens) writes: > The most common way of referencing a dead NFS path even if you don't >realize you're doing it is if you have said path in your search path >and try to execute a program and/or start a new shell. Both will >cause the search path to be scanned, and they could encounter the dead >path and hang on it. > > One solution, which is what we use, is not to hard mount anything >but the most important NFS filesystems. Another solution is to mount the filesystems in, say, /nfs, and have symbolic links to them from the places people actually refer to. Then you can remove the symbolic links if the server is down. Even better, you can have a program do it. Here's one I wrote recently. We've only just started using it, so it may not be bug-free. -- Richard /* * nfslink [-i interval] [-t timeout] host name mountpt [name mountpt ...] * * maintain links to mounted file systems, removing them if the * remote machine isn't responding. * * Copyright Richard Tobin / AIAI 1989 * * May be freely redistributed if this whole notice remains intact. */ #include #include #include #include #include #include #include #include #include main(argc, argv) int argc; char **argv; { int c, interval = 20, timeout = 5, firsttime = 1; extern char *optarg; extern int optind, opterr; while((c = getopt(argc, argv, "i:t:")) != EOF) switch(c) { case 'i': interval = atoi(optarg); break; case 't': timeout = atoi(optarg); break; case '?': usage(); break; } if((argc - optind) < 3 || ((argc - optind) & 1) == 0) usage(); while(1) { if(nfscheck(argv[optind], timeout) == 0) makelinks(&argv[optind+1], firsttime); else removelinks(&argv[optind+1], firsttime); firsttime = 0; sleep(interval); } } makelinks(links, verbose) char **links; int verbose; { struct stat namestat; while(*links) { char *name = *links++; char *mountpt = *links++; if(lstat(name, &namestat) == -1) { if(errno == ENOENT) { if(symlink(mountpt, name) == -1) { perror("nfslink: symlink"); fatal("can't link %s to %s\n", name, mountpt); } printf("nfslink: linked %s to %s\n", name, mountpt); fflush(stdout); continue; } else { perror("nfslink: lstat"); fatal("can't lstat %s\n", name, 0); } } if((namestat.st_mode & S_IFMT) == S_IFLNK) { if(pointsto(name, mountpt)) { if(verbose) { printf("nfslink: %s is already linked to %s\n", name, mountpt); fflush(stdout); } } else { fatal("%s is a link, but not to %s\n", name, mountpt); } } else { fatal("%s exists, but is not a symbolic link\n", name, 0); } } } removelinks(links, verbose) char **links; int verbose; { struct stat namestat; while(*links) { char *name = *links++; char *mountpt = *links++; if(lstat(name, &namestat) == -1) { if(errno == ENOENT) { if(verbose) { printf("nfslink: link from %s to %s is already removed\n", name, mountpt); fflush(stdout); } continue; } else { perror("nfslink: lstat"); fatal("can't lstat %s\n", name, 0); } } if((namestat.st_mode & S_IFMT) == S_IFLNK) { if(pointsto(name, mountpt)) { if(unlink(name) == -1) { perror("nfslink: unlink"); fatal("can't remove link from %s to %s\n", name, mountpt); } printf("nfslink: removed link from %s to %s\n", name, mountpt); fflush(stdout); } else { fatal("%s is a link, but not to %s\n", name, mountpt); } } else { fatal("%s exists, but is not a symbolic link\n", name, 0); } } } int pointsto(name, target) char *name, *target; { /* We don't use stat lest it hang, so it's not quite right */ char buf[200]; int len; len = readlink(name, buf, sizeof(buf)-1); if(len == -1) { perror("nfslink: readlink"); fatal("can't read link %s\n", name, 0); } buf[len] = '\0'; return strcmp(buf, target) == 0; } fatal(fmt, arg1, arg2) char *fmt, *arg1, *arg2; { fprintf(stderr, "nfslink: fatal error: "); fprintf(stderr, fmt, arg1, arg2); exit(1); } usage() { fprintf(stderr, "usage: nfslink [-i interval] [-t timeout] host name mountpt [name mountpt ...]\n"); exit(2); } static jmp_buf env; void timedout(); int nfscheck(host, timeout) char *host; int timeout; { int stat; signal(SIGALRM, timedout); if(setjmp(env) == 0) { alarm(timeout); stat = callrpc(host, NFS_PROGRAM, NFS_VERSION, RFS_NULL, xdr_void, 0, xdr_void, 0); alarm(0); if(stat == 0) return 0; } return -1; } void timedout() { longjmp(env, 1); } -- Richard Tobin, JANET: R.Tobin@uk.ac.ed AI Applications Institute, ARPA: R.Tobin%uk.ac.ed@nsfnet-relay.ac.uk Edinburgh University. UUCP: ...!ukc!ed.ac.uk!R.Tobin