home *** CD-ROM | disk | FTP | other *** search
- Xref: sparky sci.math:13289 sci.physics:16620
- Path: sparky!uunet!portal!lll-winken!sun-barr!news2me.EBay.Sun.COM!west.West.Sun.COM!smaug.West.Sun.COM!richard
- From: Richard.Mathews@West.Sun.COM (Richard M. Mathews)
- Newsgroups: sci.math,sci.physics
- Subject: Re: "The Universe of MOTION" (crackpot index)
- Message-ID: <1bkpg2INNmt1@smaug.West.Sun.COM>
- Date: 15 Oct 92 21:57:22 GMT
- References: <1992Oct13.191813.1751@cnsvax.uwec.edu> <1992Oct14.194338.8696@galois.mit.edu> <Oct.15.08.35.20.1992.24954@paul.rutgers.edu> <1992Oct15.190145.18782@galois.mit.edu>
- Organization: Sunsoft Inc., Los Angeles, CA.
- Lines: 327
- NNTP-Posting-Host: astro
- Originator: richard@astro.West.Sun.COM
-
- jbaez@riesz.mit.edu (John C. Baez) writes:
-
- >I want the index to be easy to compute. In fact, what I'd really like
- >would be an index that could be computed automatically.
-
- Ask and ye shall receive. Just before reading your posting, I completed
- just such a program. My simplified rules are:
- 100 points each for words in all caps. To count as a word, it
- must include at least one consonant and one vowel (to avoid the
- words "I" and "A" and to weed out many acronyms). It must also
- include at least one letter which is not F A or Q. Digits and
- '@' are treated as lower case letters to weed out login names,
- host names, and more acronyms.
-
- N consecutive exclamation points count for (N-1) * 100 points.
-
- 100 points for any word which includes a substring which matches
- an entry in the word list which is at the top of the program (see
- below). Comparisons are done ignoring case.
-
- No negative base value is used on the theory that we must all have some
- crackpot in us.
-
- Lines beginning with '>' don't count. Lines in the article header don't
- count except for "Subject:" and "Keywords:". Subject lines don't count
- if the subject begins with "Re:".
-
- The crackpot index is calculated by taking the score determined as above
- and dividing by the number of lines examined. The program prints the
- index followed by the name from the "From:" line.
-
- I ran this program on every article in our sci.physics spool (about 2
- weeks worth) and then averaged the indices for each author. The top 10
- winners are:
-
- 63.0769 Joe Dellinger
- 64.7059 Khan
- 65 FRED W. BACH
- 73.3183 Alexander Abian
- 87.8875 mcelwre@cnsvax.uwec.edu
- 107.692 Dani Eder
- 113.755 John Hagerman
- 133.333 Scott Brigham, corp
- 182.353 <RVESTERM@vma.cc.nd.edu>
- 319.444 Brad Wallet
-
- The prize for best crackpot spoof goes to Brad Wallet. Hagerman had a
- chance at this prize, but blew it by submitting a couple serious articles.
- Other sci.physics celebrities (defined loosely as those whose name I
- recognized in a quick scan of the list) got the following scores:
-
- 2.14634 Paul Budnik
- 2.56065 Steve Carlip
- 3.18687 Benjamin Weiner
- 3.81494 RING, DAVID WAYNE
- 4.23726 Cameron Randale Bass
- 5.13109 Douglas G. Danforth
- 5.66667 Tom Van Flandern
- 5.79702 Robert Firth
- 6.61422 Leigh Palmer
- 6.66667 Dr. Norman J. LaFave <lafave@ial4.jsc.nasa.gov>
- 7.21700 Ric Peregrino
- 7.60326 Mcirvin
- 7.89399 Matt Austern
- 8.72341 Bronis Vidugiris
- 8.75751 Daryl McCullough
- 9.08969 Hartmut Frommert
- 9.80049 Jim Carr
- 10.4906 John C. Baez
- 10.5006 hporopudas@tnclus.tele.nokia.fi
- 10.6189 Richard M. Mathews
- 11.1111 snarfy@cruzio.santa-cruz.ca.us
- 11.8265 SCOTT I CHASE
- 15.0000 Matthew P Wiener
- 15.4365 Blair P. Houghton
- 18.1966 Mcinnes B T (Dr)
- 20.1698 Terry Bollinger
- 25.6695 Jack Sarfatti
-
- There are a few scores which I am disappointed came out so low and a few
- which I am disappointed came out so high. Overall, though, this seems
- pretty good.
-
- So here is the program. It reads a single article on standard input.
- The shell script to pass each article through this program and to use
- awk or some such tool to do the averaging is left as an exercise for
- the reader.
-
- /*
- * crackpot.c written by Richard M. Mathews Richard.Mathews@West.Sun.COM
- * version 0.9
- */
- #include <stdio.h>
- #include <ctype.h>
-
- char *words[] = {
- "conspiracy",
- "status quo",
- "communication",
- "transmit",
- "ftl",
- "hawking",
- "einstein",
- "newton",
- "galileo",
- "penrose",
- "physicists", /* as in "all them physicists are out to get me" */
- "scientists",
- "genius",
- "super", /* superior and super-anything (incl. collider;-) */
- "principle", /* just seems to be common in crackpot articles */
- "b.s",
- "m.a",
- "ph.d",
- "phd",
- "dr.",
- "provok", /* variants of provoke and provocation */
- "provoc",
- "arrow",
- "according",
- };
-
- char *getline();
- extern char *malloc(), *realloc();
-
- char *name;
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- int nwords, i, lines = 0, score = 0;
- char **ptrs;
- int c;
- int all_caps = 1, vowel = 0, consonant = 0, non_faq = 0, bang = 0;
- int debug = 0;
-
- while ((c = getopt(argc, argv, "d")) != -1) {
- switch (c) {
- case 'd':
- ++debug;
- break;
- default:
- fprintf(stderr, "usage: crackpot [-d] < article\n");
- exit(2);
- }
- }
-
- nwords = sizeof(words) / sizeof(words[0]) - 1;
-
- ptrs = (char **) malloc(nwords * sizeof(*ptrs));
- if (ptrs == NULL) {
- fprintf(stderr, "Can't alloc word table\n");
- exit(1);
- }
- for (i = 0; i < nwords; ++i)
- ptrs[i] = words[i];
-
- while ((c = nextchar()) != EOF) {
- if (c == '\n')
- ++lines;
- /*
- * Check whether this character helps us advance our pointer
- * into any of the words on the word list.
- */
- for (i = 0; i < nwords; ++i) {
- if (*ptrs[i] == c
- || isupper(c) && *ptrs[i] == tolower(c)) {
- ++ptrs[i];
- if (*ptrs[i] == '\0') {
- ++score;
- if (debug)
- printf("%s\n", words[i]);
- ptrs[i] = words[i];
- }
- }
- else
- ptrs[i] = words[i];
- }
- /*
- * Check whether this word might be all caps (but it
- * must contain at least one vowel and one consonant to
- * count as a word). Mixtures of caps with digits are
- * not counted as they are likely to be login names,
- * acronyms, etc. Words connected with an '@' aren't
- * counted as they are probably login or host names.
- * Words made up only from the letters F A and Q are
- * not counted.
- */
- if (isalpha(c) || isdigit(c) || c == '@') {
- if (isupper(c)) {
- switch (c) {
- case 'A':
- case 'E':
- case 'I':
- case 'O':
- case 'U':
- ++vowel;
- break;
- default:
- ++consonant;
- break;
- }
- switch (c) {
- case 'F':
- case 'A':
- case 'Q':
- break;
- default:
- ++non_faq;
- }
- }
- else { /* lower case or digit or at-sign */
- all_caps = 0;
- }
- }
- else {
- if (all_caps && vowel && consonant && non_faq) {
- ++score;
- if (debug)
- printf("CAPS\n");
- }
- all_caps = 1;
- vowel = consonant = non_faq = 0;
- }
- /*
- * Check for consecutive exclamation points.
- */
- if (c == '!') {
- if (bang) {
- ++score;
- if (debug)
- printf("bangs\n");
- }
- bang = 1;
- }
- else
- bang = 0;
- }
- if (all_caps && vowel && consonant && non_faq) {
- ++score;
- if (debug)
- printf("CAPS\n");
- }
- printf("%f %s", lines ? 100. * score / lines : 0., name ? name : "");
- exit(0);
- }
-
- nextchar()
- {
- static char *buf;
- static body = 0; /* body of article? (as opposed to header) */
-
- while (buf == NULL || *buf == '\0') {
- buf = getline();
- if (buf == NULL)
- return EOF;
- else if (*buf == '>') /* ignore included text */
- buf = NULL;
- else if (*buf == '\n') /* start of body */
- body = 1;
- else if (body)
- ;
- else if (strncmp("Subject: Re:", buf, 12) == 0)
- buf = NULL; /* ignore "Subject" on responses */
- else if (strncmp("Subject: ", buf, 9) == 0)
- ; /* don't ignore "Subject" otherwise */
- else if (strncmp("Keywords: ", buf, 10) == 0)
- ; /* don't ignore "Keywords" */
- else { /* all other header lines */
- if (strncmp("From: ", buf, 6) == 0) {
- name = malloc(strlen(buf) - 5);
- strcpy(name, buf + 6); /* remember name */
- }
- buf = NULL; /* ignore header lines */
- }
- }
-
- return *buf++;
- }
-
- char *
- getline()
- {
- static char *buf;
- static int buflen;
- char *bufp;
-
- if (feof(stdin)) {
- return NULL;
- }
-
- if (buf == NULL) {
- buf = malloc(BUFSIZ);
- if (buf == NULL) {
- fprintf(stderr, "Can't alloc line buffer\n");
- exit(1);
- }
- buflen = BUFSIZ;
- }
-
- buf[buflen - 2] = '\0';
- bufp = buf;
- for (;;) {
- if (fgets(bufp, BUFSIZ, stdin) == NULL) {
- if (bufp == buf)
- return NULL;
- else
- break;
- }
- if (buf[buflen - 2] != '\0' && buf[buflen - 2] != '\n') {
- /* need to read more */
- buf = realloc(buf, buflen + BUFSIZ - 1);
- if (buf == NULL) {
- fprintf(stderr, "Can't realloc line buffer\n");
- exit(1);
- }
- bufp = buf + buflen - 1; /* point to the '\0' */
- buflen += BUFSIZ - 1;
- buf[buflen - 2] = '\0';
- }
- else
- break;
- }
-
- return buf;
- }
-