Scraping Googlism for Data
Assignment: Create a Processing applet that uses input from a text file or URL.
This applet mines the googlism.com website for text, based on the name that is typed in by the user.
Click here to see it in action >>
Following is the Processing code:
// Kate Monahan
// ICM, 2006
// this code scrapes googlism.com for data
PFont f;
String googleContent = ""; // Var to store input from Google site
String firstName = ""; // Var to store name
String googlism1 = ""; // Var to store results from Google
void setup() {
size(250,250);
// Call our new getGooglism function!
getGooglism(firstName);
f = loadFont("OptimaLTStd-Medium-16.vlw");
println(firstName);
println(googlism1);
}
void draw() {
background(100);
textFont(f);
textMode(SCREEN);
fill(255);
noStroke();
rect(10,60,200,20);
// Display all the stuff we want to display
if (googlism1 == ""){
text("Click below, type your first name & hit enter:",10,10,width-20,height-10);
fill(0);
text(firstName,12,63,width-10,height-10);
}
else if (googlism1 != ""){
fill(100);
rect(10,60,200,20);
fill(255);
text("Your #1 Googlism is: ",10,10,width-10,height-10);
text("\"" + googlism1 + "\"",10,30,width-10,height-10);
fill(200);
text("Hit enter again to reset >>",10,230,width-10,height-10);
}
}
void getGooglism(String zip) {
//Get all the HTML/XML source code into an array of strings (each line is
//one element in the array)
//note that a line break has been inserted for formatting purposes
//here after the "?"
String url = "http://itp.nyu.edu/icm/proxy/proxy.php?
url=http://www.googlism.com/index.htm?ism=" + firstName;
String[] lines = loadStrings(url);
// Get rid of the array and make it one very long String
String xml = join(lines, " ");
// Searching for weather condition
String lookfor = "Googlism for:</span> ";
String end = "
\r\r\t\t\r\t\r\t<tr>\r\t\t\r\t\t<table class=";
googleContent = giveMe(xml,lookfor,end);
// Searching for first name
end = "
";
firstName = giveMe(xml,lookfor,end);
// Searching for first google response
lookfor = "
";
end = "
";
googlism1 = giveMe(xml,lookfor,end);
}
// A function that returns a substring between two substrings
String giveMe(String s, String before, String after) {
String found = "";
// Find the index of the beginning tag
int start = s.indexOf(before);
// If we don't find anything, send back a blank String
if (start == -1) return "";
// Move to the end of the beginning tag
start += before.length();
// Find the index of the end tag
int end = s.indexOf(after,start);
// If we don't find the end tag, send back a blank String
if (end == -1) return "";
// Return the text in between
return s.substring(start,end);
}
void keyPressed() {
// If the return key is pressed, send the String to google and clear it
if (key == '\n') {
getGooglism(firstName);
firstName = "";
}
// Otherwise, concatenate the String with what the
// user types on the keyboard
else {
firstName = firstName + key;
}
}
TrackBack
TrackBack URL for this entry:
http://itp.nyu.edu/~km63/cgi-bin/mt/mt-tb.cgi/10