package search;

import java.util.Set;

import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
import com.aliasi.tokenizer.StopTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.CollectionUtils;
import com.aliasi.util.Strings;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;

public class StopWords {

	/**
	 * @param args
	 * @throws FileNotFoundException 
	 */
	public static void main(String[] args) throws FileNotFoundException {
		// TODO Auto-generated method stub
		String searchPhrase = args[0];
		System.out.println("Original search phrase: "+searchPhrase);
		
		//File of words found on ranks.nl
		File words = new File("files/words.txt");
		BufferedReader br = new BufferedReader(new FileReader(words));
		Set stopSet = CollectionUtils.asSet();

		try {
			String word = null;
			while ((word=br.readLine())!=null){	
				stopSet.add(word);
			}
			TokenizerFactory f1 = IndoEuropeanTokenizerFactory.INSTANCE;
			TokenizerFactory f2 = new LowerCaseTokenizerFactory(f1);
			TokenizerFactory f3 = new StopTokenizerFactory(f2,stopSet);
			
			displayTokens(searchPhrase,f3);

			 
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
	      e.printStackTrace();
	    }
	    
	}

	public static void displayTokens(CharSequence in,TokenizerFactory tokFact) {
			System.out.print("Final search terms: ");

			char[] cs = Strings.toCharArray(in);
			Tokenizer tokenizer = tokFact.tokenizer(cs,0,cs.length);

			for (String token : tokenizer) {
				System.out.print(token+" ");
			}
	}
}