import urllib2 import re import StringIO from BeautifulSoup import BeautifulSoup import portage from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS from euscan import helpers import euscan def scan_html(data, url, pattern): soup = BeautifulSoup(data) results = [] for link in soup.findAll('a'): href = link.get("href") if not href: continue if href.startswith(url): href = href.replace(url, "", 1) match = re.match(pattern, href, re.I) if match: results.append((match.group(1), match.group(0))) return results def scan_ftp(data, url, pattern): buf = StringIO.StringIO(data) results = [] for line in buf.readlines(): line = line.replace("\n", "").replace("\r", "") match = re.search(pattern, line, re.I) if match: results.append((match.group(1), match.group(0))) return results def scan_directory_recursive(cp, ver, rev, url, steps): if not steps: return [] url += steps[0][0] pattern = steps[0][1] steps = steps[1:] euscan.output.einfo("Scanning: %s" % url) try: fp = helpers.urlopen(url) except urllib2.URLError: return [] except IOError: return [] if not fp: return [] data = fp.read() results = [] if re.search("<\s*a\s+[^>]*href", data): results.extend(scan_html(data, url, pattern)) elif url.startswith('ftp://'): results.extend(scan_ftp(data, url, pattern)) versions = [] for version, path in results: if helpers.version_filtered(cp, ver, version): continue if not url.endswith('/') and not path.startswith('/'): path = url + '/' + path else: path = url + path versions.append((path, version)) if steps: ret = scan_directory_recursive(cp, ver, rev, path, steps) versions.extend(ret) return versions def scan(cpv, url): for bu in SCANDIR_BLACKLIST_URLS: if re.match(bu, url): euscan.output.einfo("%s is blacklisted by rule %s" % (url, bu)) return [] resolved_url = helpers.parse_mirror(url) if not resolved_url: return [] cp, ver, rev = portage.pkgsplit(cpv) # 'Hack' for _beta/_rc versions where _ is used instead of - if ver not in resolved_url: newver = helpers.version_change_end_sep(ver) if newver and newver in resolved_url: euscan.output.einfo("Version: using %s instead of %s" % (newver, ver)) ver = newver template = helpers.template_from_url(resolved_url, ver) if '${' not in template: euscan.output.einfo("Url doesn't seems to depend on version: %s not found in %s" % (ver, resolved_url)) return [] else: euscan.output.einfo("Scanning: %s" % template) steps = helpers.generate_scan_paths(template) return scan_directory_recursive(cp, ver, rev, "", steps) def brute_force(cpv, url): cp, ver, rev = portage.pkgsplit(cpv) url = helpers.parse_mirror(url) if not url: return [] for bp in BRUTEFORCE_BLACKLIST_PACKAGES: if re.match(bp, cp): euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] for bp in BRUTEFORCE_BLACKLIST_URLS: if re.match(bp, url): euscan.output.einfo("%s is blacklisted by rule %s" % (cp, bp)) return [] euscan.output.einfo("Generating version from " + ver) components = helpers.split_version(ver) versions = helpers.gen_versions(components, CONFIG["brute-force"]) """ Remove unwanted versions """ for v in versions: if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0: versions.remove(v) if not versions: euscan.output.einfo("Can't generate new versions from " + ver) return [] template = helpers.template_from_url(url, ver) if '${PV}' not in template: euscan.output.einfo("Url doesn't seems to depend on full version: %s not found in %s" % (ver, url)) return [] else: euscan.output.einfo("Brute forcing: %s" % template) result = [] i = 0 done = [] while i < len(versions): components = versions[i] i += 1 if components in done: continue done.append(tuple(components)) version = helpers.join_version(components) if helpers.version_filtered(cp, ver, version): continue url = helpers.url_from_template(template, version) infos = helpers.tryurl(url, template) if not infos: continue result.append([url, version]) if len(result) > CONFIG['brute-force-false-watermark']: euscan.output.einfo("Broken server detected ! Skipping brute force.") return [] if CONFIG["brute-force-recursive"]: for v in helpers.gen_versions(list(components), CONFIG["brute-force"]): if v not in versions and tuple(v) not in done: versions.append(v) if CONFIG["oneshot"]: break return result def can_handle(cpv, url): return True