From 50c95f1beffa3d80a0e636a28c23dd51a6bd7803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20F=2E=20Sch=C3=B6nitzer?= Date: Sun, 17 Jun 2018 16:10:44 +0200 Subject: [PATCH] Remove all protocols and tlds on URL-cleaning Also make removal of protocol, tld and 'www' more robust against fails-positives by considering the position in the url. --- autoload/vimwiki/base.vim | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/autoload/vimwiki/base.vim b/autoload/vimwiki/base.vim index b1768c2..e13623a 100644 --- a/autoload/vimwiki/base.vim +++ b/autoload/vimwiki/base.vim @@ -1858,20 +1858,17 @@ endfunction function! s:clean_url(url) - let url = split(a:url, '/\|=\|-\|&\|?\|\.') + " remove protocol and tld + let url = substitute(a:url, '^\a\+://', '', '') + let url = substitute(url, '^\([^/]\+\).\a\{2,4}/', '\1/', '') + let url = split(url, '/\|=\|-\|&\|?\|\.') let url = filter(url, 'v:val !=# ""') - let url = filter(url, 'v:val !=# "www"') - let url = filter(url, 'v:val !=# "com"') - let url = filter(url, 'v:val !=# "org"') - let url = filter(url, 'v:val !=# "net"') - let url = filter(url, 'v:val !=# "edu"') - let url = filter(url, 'v:val !=# "http\:"') - let url = filter(url, 'v:val !=# "https\:"') - let url = filter(url, 'v:val !=# "file\:"') - let url = filter(url, 'v:val !=# "xml\:"') - let url = filter(url, 'v:val !=# "html"') - let url = filter(url, 'v:val !=# "htm"') - let url = filter(url, 'v:val !=# "php"') + if url[0] == "www" + let url = url[1:] + endif + if url[-1] =~ '^\(htm\|html\|php\)$' + let url = url[0:-2] + endif " remove words consisting of only hexadecimal digits or non-word characters let url = filter(url, 'v:val !~ "^\\A\\{4,}$"') let url = filter(url, 'v:val !~ "^\\x\\{4,}$" || v:val !~ "\\d"')