#!/usr/bin/env python # Written 2003 by Adam Gurno # $Id: onlybody,v 1.1 2003/10/12 03:42:16 abg Exp $ # This file is in the public domain. # File accept a single HTML file as input and strips out everything not # inside the tags. Everything inside is printed to stdout. # See the post2advo script for suggested usage. # There is no error checking, nothing. Know what you are doing, eh? import sys IN_FILE = sys.argv[1] can_print = 0 f_IN_FILE = open (IN_FILE, 'r') # read in the file, strip off the \n's html_list = [x[:-1] for x in f_IN_FILE.readlines()] for single_line in html_list: if (single_line == ""): can_print = 1 continue elif (single_line == ""): can_print = 0 if can_print: print single_line