Changeset 1484
- Timestamp:
- 12/07/07 17:22:03 (9 months ago)
- Files:
-
- phoneyc/trunk/honeyclient.py (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
phoneyc/trunk/honeyclient.py
r1483 r1484 379 379 # build, store, return js and vbs content 380 380 def __init__(self, contents, url): 381 url = self.fixup(url) 381 382 self.url = url 382 383 self.baseurl = '/'.join(url.split('/')[:-1]) # for relative links … … 442 443 self.links = self.hrefs + self.iframes + self.objects + self.imgs + self.frames 443 444 445 def fixup(self, url): 446 """ 447 fixes a URL that may lack a required trailing slash. without it 448 we don't get proper relative URLs. 449 """ 450 c = 0 451 for i in url: 452 if i == '/': c += 1 453 if c < 3: return '%s/' % url 454 else: return url 444 455 445 456 class PageParser(SGMLParser): … … 520 531 521 532 def end_a(self): 533 pass 534 535 def start_meta(self, attrs): 536 for k, v in attrs: 537 if k.lower() == 'content' and 'url=' in v.lower(): 538 url = v.split(';') 539 for u in url: 540 if u.lower().startswith('url='): 541 url = re.sub('URL=', '', u, re.IGNORECASE) 542 url = url.replace('"', '') 543 if url not in self.hrefs: self.hrefs.append(url) 544 545 def end_meta(self): 522 546 pass 523 547 … … 592 616 if self.js_inScript: self.js_body.append(text) 593 617 if self.vbs_inScript: self.vbs_body.append(text) 594 595 618 596 619 LINKS = ('http://www.google.com/', )
