/* Daniel Shiffman */ /* Bayesian Spam Filter Example */ /* Programming from A to Z */ /* Spring 2006 */ /* http://www.shiffman.net */ /* daniel.shiffman@nyu.edu */ // This class describes a word // and the various probabilities associated with that word package bayes; public class Word { private String word; // The String itself private int countCat1; // The total times it appears in "bad" messages private int countCat2; // The total times it appears in "good" messages private float rCat1; // bad count / total bad words private float rCat2; // good count / total good words private float pCat1; // probability this word is Spam // Create a word, initialize all vars to 0 public Word(String s) { word = s; countCat1 = 0; countCat2 = 0; rCat1 = 0.0f; rCat2 = 0.0f; pCat1 = 0.0f; } // Increment bad counter public void countCat1() { countCat1++; } // Increment good counter public void countCat2() { countCat2++; } // Computer how often this word is bad public void calcCat1Prob(int total) { if (total > 0) rCat1 = countCat1 / (float) total; } // Computer how often this word is good public void calcCat2Prob(int total) { if (total > 0) rCat2 = 2*countCat2 / (float) total; // multiply 2 to help fight against false positives (via Graham) } // Implement bayes rules to computer how likely this word is "spam" public void finalizeProb() { if (rCat2 + rCat1 > 0) pCat1 = rCat1 / (rCat1 + rCat2); if (pCat1 < 0.01f) pCat1 = 0.01f; else if (pCat1 > 0.99f) pCat1 = 0.99f; } // The "interesting" rating for a word is // How different from 0.5 it is public float interesting() { return Math.abs(0.5f - pCat1); } // Some getters and setters public float getPGood() { return rCat2; } public float getPBad() { return rCat1; } // Some getters and setters public int getGoodCount() { return countCat2; } public int getBadCount() { return countCat1; } public float getPCat1() { return pCat1; } public void setPCat1(float f) { pCat1 = f; } public String getWord() { return word; } }