BeautifulSoup

BeautifulSoup is an excellent python library to parse HTML content. This library is invaluable when you are trying to extract content from web pages. BeautifulSoup can parse even badly formed HTML files with no trouble.

Example usage

import urllib2, re
from  BeautifulSoup import BeautifulSoup

class FlickrPickr(object):
    def __init__(self, url):
        self.url = url
        sq = '%ssizes/sq/' % (url, )
        page = urllib2.urlopen(sq)
        soup = BeautifulSoup(page)
        p = soup.findAll(src=re.compile('jpg$'))
        if p:
            img = p[0]['src']
            bits = img.split('/')
            pic = bits[-1]
            self.baseurl = '/'.join(bits[:-1])
            bits  = pic.split('.')[0].split('_')
            self.id = bits[0]
            self.extra = bits[1]

def get_any(self, endbit):
        return '/'.join([self.baseurl, '_'.join([self.id, self.extra, endbit+'.jpg'])])

def get_square(self):
        return self.get_any('s')

def get_thumb(self):
        return self.get_any('t')

def get_small(self):
        return self.get_any('m')

# doesnt work :(    
    #def get_original(self):
    #    return self.get_any('o')

def get_medium(self):
        return '/'.join([self.baseurl, '_'.join([self.id, self.extra+'.jpg'])])

def main():
    fp = FlickrPickr('http://flickr.com/photos/btbytes/2818112372/')

print fp.get_square()
    print fp.get_thumb()
    print fp.get_small()
    print fp.get_medium()
    print fp.get_original()

if __name__ == '__main__':
    main()  

§