« Midterm Prototype Notes | Main | ICM Midterm Prep »

Scraping Googlism for Data

Assignment: Create a Processing applet that uses input from a text file or URL.

This applet mines the googlism.com website for text, based on the name that is typed in by the user.

Click here to see it in action >>

Following is the Processing code:

// Kate Monahan
// ICM, 2006
// this code scrapes googlism.com for data

PFont f;

String googleContent = ""; // Var to store input from Google site
String firstName = ""; // Var to store name
String googlism1 = ""; // Var to store results from Google

void setup() {
  size(250,250);
  // Call our new getGooglism function!
  getGooglism(firstName);
  f = loadFont("OptimaLTStd-Medium-16.vlw");
  println(firstName);
  println(googlism1);
}

void draw() {
  background(100);
  textFont(f);
  textMode(SCREEN);
  fill(255);
  noStroke();
  rect(10,60,200,20);
  // Display all the stuff we want to display
  if (googlism1 == ""){
    text("Click below, type your first name & hit enter:",10,10,width-20,height-10);
    fill(0);
    text(firstName,12,63,width-10,height-10);
  }
  else if (googlism1 != ""){
    fill(100);
    rect(10,60,200,20);
    fill(255);
    text("Your #1 Googlism is: ",10,10,width-10,height-10);
    text("\"" + googlism1 + "\"",10,30,width-10,height-10);
    fill(200);
    text("Hit enter again to reset >>",10,230,width-10,height-10);
  }
}

void getGooglism(String zip) {
  //Get all the HTML/XML source code into an array of strings (each line is 
   //one element in the array)
  //note that a line break has been inserted for formatting purposes 
  //here after the "?"
   String url = "http://itp.nyu.edu/icm/proxy/proxy.php?
  url=http://www.googlism.com/index.htm?ism=" + firstName;
  String[] lines = loadStrings(url);

 // Get rid of the array and make it one very long String
  String xml = join(lines, " "); 

  // Searching for weather condition
  String lookfor = "Googlism for:</span> ";
  String end = "
\r\r\t\t\r\t\r\t<tr>\r\t\t\r\t\t<table class="; googleContent = giveMe(xml,lookfor,end); // Searching for first name end = "
"; firstName = giveMe(xml,lookfor,end); // Searching for first google response lookfor = "
"; end = "
"; googlism1 = giveMe(xml,lookfor,end); } // A function that returns a substring between two substrings String giveMe(String s, String before, String after) { String found = ""; // Find the index of the beginning tag int start = s.indexOf(before); // If we don't find anything, send back a blank String if (start == -1) return ""; // Move to the end of the beginning tag start += before.length(); // Find the index of the end tag int end = s.indexOf(after,start); // If we don't find the end tag, send back a blank String if (end == -1) return ""; // Return the text in between return s.substring(start,end); } void keyPressed() { // If the return key is pressed, send the String to google and clear it if (key == '\n') { getGooglism(firstName); firstName = ""; } // Otherwise, concatenate the String with what the // user types on the keyboard else { firstName = firstName + key; } }

TrackBack

TrackBack URL for this entry:
http://itp.nyu.edu/~km63/cgi-bin/mt/mt-tb.cgi/10

Post a comment

(If you haven't left a comment here before, you may need to be approved by the site owner before your comment will appear. Until then, it won't appear on the entry. Thanks for waiting.)