package search;
import java.util.Set;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
import com.aliasi.tokenizer.StopTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.CollectionUtils;
import com.aliasi.util.Strings;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;
public class StopWords {
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws FileNotFoundException {
// TODO Auto-generated method stub
String searchPhrase = args[0];
System.out.println("Original search phrase: "+searchPhrase);
//File of words found on ranks.nl
File words = new File("files/words.txt");
BufferedReader br = new BufferedReader(new FileReader(words));
Set stopSet = CollectionUtils.asSet();
try {
String word = null;
while ((word=br.readLine())!=null){
stopSet.add(word);
}
TokenizerFactory f1 = IndoEuropeanTokenizerFactory.INSTANCE;
TokenizerFactory f2 = new LowerCaseTokenizerFactory(f1);
TokenizerFactory f3 = new StopTokenizerFactory(f2,stopSet);
displayTokens(searchPhrase,f3);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void displayTokens(CharSequence in,TokenizerFactory tokFact) {
System.out.print("Final search terms: ");
char[] cs = Strings.toCharArray(in);
Tokenizer tokenizer = tokFact.tokenizer(cs,0,cs.length);
for (String token : tokenizer) {
System.out.print(token+" ");
}
}
}