Improve SHA1 link detection (#6526)
This improves the SHA1 link detection to not pick up extraneous non-whitespace characters at the end of the URL. The '.' is a special case handled in code itself because of missing regexp lookahead support. Regex test cases: https://regex101.com/r/xUMlqh/3
This commit is contained in:
parent
0bdd81df9d
commit
2242a9f82e
|
@ -54,7 +54,7 @@ var (
|
||||||
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
|
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
|
||||||
|
|
||||||
// anySHA1Pattern allows to split url containing SHA into parts
|
// anySHA1Pattern allows to split url containing SHA into parts
|
||||||
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
|
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
|
||||||
|
|
||||||
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
|
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
|
||||||
|
|
||||||
|
@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// take out what's relevant
|
|
||||||
urlFull := node.Data[m[0]:m[1]]
|
urlFull := node.Data[m[0]:m[1]]
|
||||||
hash := node.Data[m[2]:m[3]]
|
text := base.ShortSha(node.Data[m[2]:m[3]])
|
||||||
|
|
||||||
var subtree, line string
|
// 3rd capture group matches a optional path
|
||||||
|
subpath := ""
|
||||||
// optional, we do them depending on the length.
|
|
||||||
if m[7] > 0 {
|
|
||||||
line = node.Data[m[6]:m[7]]
|
|
||||||
}
|
|
||||||
if m[5] > 0 {
|
if m[5] > 0 {
|
||||||
subtree = node.Data[m[4]:m[5]]
|
subpath = node.Data[m[4]:m[5]]
|
||||||
}
|
}
|
||||||
|
|
||||||
text := base.ShortSha(hash)
|
// 4th capture group matches a optional url hash
|
||||||
if subtree != "" {
|
hash := ""
|
||||||
text += "/" + subtree
|
if m[7] > 0 {
|
||||||
}
|
hash = node.Data[m[6]:m[7]][1:]
|
||||||
if line != "" {
|
|
||||||
text += " ("
|
|
||||||
text += line
|
|
||||||
text += ")"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
replaceContent(node, m[0], m[1], createLink(urlFull, text))
|
start := m[0]
|
||||||
|
end := m[1]
|
||||||
|
|
||||||
|
// If url ends in '.', it's very likely that it is not part of the
|
||||||
|
// actual url but used to finish a sentence.
|
||||||
|
if strings.HasSuffix(urlFull, ".") {
|
||||||
|
end--
|
||||||
|
urlFull = urlFull[:len(urlFull)-1]
|
||||||
|
if hash != "" {
|
||||||
|
hash = hash[:len(hash)-1]
|
||||||
|
} else if subpath != "" {
|
||||||
|
subpath = subpath[:len(subpath)-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if subpath != "" {
|
||||||
|
text += subpath
|
||||||
|
}
|
||||||
|
|
||||||
|
if hash != "" {
|
||||||
|
text += " (" + hash + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
replaceContent(node, start, end, createLink(urlFull, text))
|
||||||
}
|
}
|
||||||
|
|
||||||
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
|
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
|
||||||
|
|
|
@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
|
||||||
testCases := map[string][]string{
|
testCases := map[string][]string{
|
||||||
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
|
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
|
||||||
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
|
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
|
||||||
"test/unit/event.js",
|
"/test/unit/event.js",
|
||||||
"L2703",
|
"#L2703",
|
||||||
},
|
},
|
||||||
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
|
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
|
||||||
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
|
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
|
||||||
"test/unit/event.js",
|
"/test/unit/event.js",
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
|
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
|
||||||
|
@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
|
||||||
},
|
},
|
||||||
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
|
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
|
||||||
"0705be475092aede1eddae01319ec931fb9c65fc",
|
"0705be475092aede1eddae01319ec931fb9c65fc",
|
||||||
"src",
|
"/src",
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
|
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
|
||||||
"d8a994ef243349f321568f9e36d5c3f444b99cae",
|
"d8a994ef243349f321568f9e36d5c3f444b99cae",
|
||||||
"",
|
"",
|
||||||
"diff-2",
|
"#diff-2",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue