Amazon.frink

Download or view Amazon.frink in plain text format


class Amazon
{
   // This is an array of Category objects to process
   var queue = new array

   // This is a set of seen category codes (as integer)
   var seen = new set

   // This is a dictionary of <ID, Category>
   var hierarchy = new dict

   processQueue[] :=
   {
      while length[queue] > 0
      {
         cat = queue.popFirst[]
         readPage["http://www.amazon.com/" + cat.urlpart + "/zgbs/books/" + cat.id + "/", cat.id]
//         println["processing " + cat]
//         println[hierarchy]
//         println[length[queue] + " items in queue."]
      }
   }

   readPage[url, parentID] :=
   {
      page = read[url, "windows-1252"]
//      println[page]
//      for [urlpart, index, title] = parts =  page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)\/[^"']*?['"]\s*>([^<]+)/g
      for [urlpart, index, title] = parts =  page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)[^'"]*['"]\s*>([^<]+)/g
      {
         addQueue[urlpart, parseInt[index], parentID, title]
      }
   }

   // Add a new category to the queue
   addQueue[urlpart, id, parentID, title] :=
   {
      if seen.contains[id] or id == parentID
         return

      seen.put[id]
//      println["Adding $title"]
      parent = hierarchy@parentID
      parentTitle = (parent != undef) ? parent.title + " | " : ""
      fullTitle = parentTitle + title
      cat = new Category[urlpart, id, parentID, fullTitle]
      println["$id\t$parentID\t$fullTitle"]
      queue.push[cat]
      hierarchy@id = cat
   }
}

class Category
{
   var urlpart
   var id
   var parentID
   var title

   new[url, i, parent, t] :=
   {
      urlpart = url
      id = i
      parentID = parent
      title = t
   }
}

a = new Amazon
a.readPage["http://www.amazon.com/gp/bestsellers/books/ref=sv_b_2", undef]
a.processQueue[]
//println[a.hierarchy]


Download or view Amazon.frink in plain text format


This is a program written in the programming language Frink.
For more information, view the Frink Documentation or see More Sample Frink Programs.

Alan Eliasen was born 20218 days, 0 hours, 14 minutes ago.