Compare commits

..

No commits in common. "5b9d44fee1b473ca66cb29464f8d24ad5e6c8dc6" and "b2cd013b091f990e139b6ec3b53db92e7d678eea" have entirely different histories.

9 changed files with 241 additions and 12 deletions

View File

@ -1,15 +1,15 @@
repos: repos:
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 24.2.0 rev: 23.11.0
hooks: hooks:
- id: black - id: black
- repo: https://github.com/PyCQA/isort - repo: https://github.com/PyCQA/isort
rev: 5.13.2 rev: 5.12.0
hooks: hooks:
- id: isort - id: isort
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.0 rev: v0.1.4
hooks: hooks:
- id: ruff - id: ruff

10
TODO
View File

@ -42,7 +42,11 @@ euscan
- remote-id type deb repository: - remote-id type deb repository:
-- find out how to get download url (not sure it's possible) -- find out how to get download url (not sure it's possible)
### bugs or unwanted behavior ### remote-id
- Propose new remote-id: deb
e.g.: <remote-id type="deb">
http://mysite.com/deb/dists/stable/main/binary-i386/Packages
</remote-id>
- Propose new remote-id: freecode
e.g.: <remote-id type="freecode">projectname</remote-id>
- Parsing docs and accepting 404's
-- net-analyzer/sensu

View File

@ -51,7 +51,7 @@ BLACKLIST_PACKAGES = [
] ]
SCANDIR_BLACKLIST_URLS = [ SCANDIR_BLACKLIST_URLS = [
"https://rubygems.org/(.*)", # Not browsable "mirror://rubygems/(.*)", # Not browsable
"mirror://gentoo/(.*)", # Directory too big "mirror://gentoo/(.*)", # Directory too big
"https://dev.gentoo.org/(.*)", # There shouldn't be releases here "https://dev.gentoo.org/(.*)", # There shouldn't be releases here
# Waste of time to go through # Waste of time to go through

View File

@ -0,0 +1,59 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import output
from euscan.handlers.url import process_scan as url_scan
from euscan.helpers import regex_from_template
HANDLER_NAME = "berlios"
CONFIDENCE = 90
PRIORITY = 90
berlios_regex = r"mirror://berlios/([^/]+)/([^/]+)"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.search(berlios_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using BerliOS handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
project, filename = re.search(berlios_regex, url).groups()
project_page = "http://developer.berlios.de/projects/%s" % project
content = urllib.request.urlopen(project_page).read()
project_id = re.search(r"/project/filelist.php\?group_id=(\d+)", content).group(1)
base_url = (
"http://developer.berlios.de/project/filelist.php?group_id=%s" % project_id
)
file_pattern = regex_from_template(filename.replace(ver, "${PV}"))
result = url_scan(pkg, base_url, file_pattern)
ret = []
for found_url, pv, _, _ in result:
found_url = found_url.replace("prdownload", "download")
ret.append((found_url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,53 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "freecode"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return False
def scan_pkg(pkg, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package = options["data"].strip()
output.einfo("Using FreeCode handler: " + package)
fp = urllib.request.urlopen("http://freecode.com/projects/%s/releases" % package)
content = str(fp.read())
result = re.findall(
r'<a href="/projects/%s/releases/(\d+)">([^<]+)</a>' % package, content
)
ret = []
for release_id, up_pv in result:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
fp = urllib.request.urlopen(
f"http://freecode.com/projects/{package}/releases/{release_id}"
)
content = str(fp.read())
download_page = re.findall(r'<a href="(/urls/[^"]+)"', content)[0]
fp = urllib.request.urlopen("http://freecode.com%s" % download_page)
content = str(fp.read())
url = re.findall(
r'In case it doesn\'t, click here: <a href="([^"]+)"', content
)[0]
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,66 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "github"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith("mirror://github/")
def guess_package(cp, url):
match = re.search("^mirror://github/(.*?)/(.*?)/(.*)$", url)
assert match
return (match.group(1), match.group(2), match.group(3))
def scan_url(pkg, url, options):
"http://developer.github.com/v3/repos/downloads/"
user, project, filename = guess_package(pkg.cpv, url)
# find out where version is expected to be found
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in filename:
return
# now create a filename-matching regexp
# XXX: supposedly replace first with (?P<foo>...)
# and remaining ones with (?P=foo)
fnre = re.compile("^%s$" % re.escape(filename).replace(re.escape(ver), "(.*?)"))
output.einfo(
f"Using github API for: project={project} user={user} filename={filename}"
)
dlreq = urllib.request.urlopen(
f"https://api.github.com/repos/{user}/{project}/downloads"
)
dls = json.load(dlreq)
ret = []
for dl in dls:
m = fnre.match(dl["name"])
if m:
pv = mangling.mangle_version(m.group(1), options)
if helpers.version_filtered(cp, ver, pv):
continue
url = mangling.mangle_url(dl["html_url"], options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,47 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import portage
from euscan import output
from euscan.handlers.url import process_scan as url_scan
from euscan.helpers import regex_from_template
HANDLER_NAME = "google-code"
CONFIDENCE = 90
PRIORITY = 90
package_name_regex = r"http://(.+).googlecode.com/files/.+"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.match(package_name_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using Google Code handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package_name = re.match(package_name_regex, url).group(1)
base_url = "http://code.google.com/p/%s/downloads/list" % package_name
file_pattern = regex_from_template(url.split("/")[-1].replace(ver, "${PV}"))
result = url_scan(pkg, base_url, file_pattern)
ret = []
for url, pv, _, _ in result:
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -17,11 +17,11 @@ PRIORITY = 90
def can_handle(pkg, url=None): def can_handle(pkg, url=None):
return url and url.startswith("https://files.pythonhosted.org/packages/source/p/") return url and url.startswith("mirror://pypi/")
def guess_package(cp, url): def guess_package(cp, url):
match = re.search(r"https://files.pythonhosted.org/packages/source/p/(.*)/.*", url) match = re.search(r"mirror://pypi/\w+/(.*)/.*", url)
if match: if match:
return match.group(1) return match.group(1)

View File

@ -1,5 +1,5 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com> # Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2024 src_prepare group # Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2 # Distributed under the terms of the GNU General Public License v2
import json import json
@ -18,11 +18,11 @@ PRIORITY = 90
def can_handle(pkg, url=None): def can_handle(pkg, url=None):
return url and url.startswith("https://rubygems.org/") return url and url.startswith("mirror://rubygems/")
def guess_gem(cpv, url): def guess_gem(cpv, url):
match = re.search("https://rubygems.org/gems/(.*).gem", url) match = re.search("mirror://rubygems/(.*).gem", url)
if match: if match:
cpv = "fake/%s" % match.group(1) cpv = "fake/%s" % match.group(1)