| 13 | | soup = BeautifulSoup(f) |
| 14 | | table = soup.body.findAll('tbody')[1].findAll('tr') |
| 15 | | for tr in table : |
| 16 | | row = tr.findAll('td') |
| 17 | | print "0 = %s\n\n\n" %row[0].contents[0] |
| 18 | | print "nbr = %s\n\n\n" %row[1].contents[0] |
| 19 | | print "title = %s\n\n\n" %row[2].a.contents[0] |
| 20 | | print "package = %s\n\n\n" %row[3].contents[0] |
| 21 | | print "importance = %s\n\n\n" %row[4].contents[0] |
| 22 | | print "status = %s\n\n\n" %row[5].contents[0] |
| | 15 | # ici on a le corps principal |
| | 16 | # faudrait faire un extract pour soulager la mémoire |
| | 17 | soup = BeautifulSoup(f).body.contents[5].div.div |
| | 18 | title = soup.h1.contents[0] |
| | 19 | tbody = soup.findAll('tbody')[0].findAll('td') |
| | 20 | product = tbody[0].a.contents[0] |
| | 21 | status = tbody[1].contents[0] |
| | 22 | importance = tbody[2].contents[0] |
| | 23 | assigned = tbody[3].a.contents[2] |
| | 24 | #print title |
| | 25 | #print product |
| | 26 | #print status |
| | 27 | #print importance |
| | 28 | #need to prettify assigned |
| | 29 | #print assigned |
| | 30 | content='' |
| | 31 | p = soup.findAll('div', recursive=0)[1].div.findAll('p', recursive=0) |
| | 32 | for i in p: |
| | 33 | #print i.renderContents() |
| | 34 | for j in i.contents: |
| | 35 | jj = j.string |
| | 36 | if jj != None and jj != "<br />" : |
| | 37 | print "#############################" |
| | 38 | newj = jj.strip().replace(" "," ") |
| | 39 | print str(newj) |
| | 40 | content = "%s\n%s"%(content,newj) |
| | 41 | #print newj |
| | 42 | #print j.renderContents() |
| | 43 | #print "#####################" |
| | 44 | content = "%s\n"%content |
| | 45 | #print content |
| | 46 | |
| | 47 | #p = soup.p |
| | 48 | #content = p.contents[0] |
| | 49 | #print p.name |
| | 50 | #p = p.next |
| | 51 | #print p |
| | 52 | #p = p.nextSibling.nextSibling |
| | 53 | #while p.name != 'form' : |
| | 54 | # print p.name |
| | 55 | # if p.name == 'p' : |
| | 56 | # content = content+'\n'+p.contents[0] |
| | 57 | # p = p.next |
| | 58 | #print content |
| | 59 | |
| | 60 | |