diff --git a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java index 8998525cc..2b9f24f82 100644 --- a/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java +++ b/src/main/java/org/codelibs/fess/crawler/transformer/FessTransformer.java @@ -15,6 +15,7 @@ */ package org.codelibs.fess.crawler.transformer; +import java.net.URI; import java.net.URLDecoder; import java.util.Arrays; import java.util.Collections; @@ -182,8 +183,7 @@ public interface FessTransformer { return StringUtil.EMPTY; } - String u = decodeUrlAsName(url, url.startsWith("file:")); - + String u = url; int idx = u.lastIndexOf('?'); if (idx >= 0) { u = u.substring(0, idx); @@ -193,7 +193,7 @@ public interface FessTransformer { if (idx >= 0) { u = u.substring(0, idx); } - + u = decodeUrlAsName(u, u.startsWith("file:")); idx = u.lastIndexOf('/'); if (idx >= 0) { if (u.length() > idx + 1) { @@ -262,4 +262,4 @@ public interface FessTransformer { } return null; } -} \ No newline at end of file +} diff --git a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java index c800f9367..337c67ee5 100644 --- a/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java +++ b/src/main/java/org/codelibs/fess/ds/impl/GitBucketDataStoreImpl.java @@ -16,7 +16,8 @@ package org.codelibs.fess.ds.impl; import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; @@ -224,7 +225,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { } protected String getGitRef(final String rootURL, final String authToken, final String owner, final String name, final String branch) { - final String url = rootURL + "api/v3/repos/" + owner + "/" + name + "/git/refs/heads/" + encode(branch); + final String url = encode(rootURL, "api/v3/repos/" + owner + "/" + name + "/git/refs/heads/" + branch, null); try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) { final Map map = curlResponse.getContentAsMap(); @@ -268,8 +269,8 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { final String name, final String refStr, final List roleList, final String path, final CrawlingConfig crawlingConfig, final IndexUpdateCallback callback, final Map paramMap, final Map scriptMap, final Map defaultDataMap) { - final String apiUrl = rootURL + "api/v3/repos/" + owner + "/" + name + "/contents/" + path; - final String viewUrl = rootURL + owner + "/" + name + "/blob/" + refStr + "/" + path; + final String apiUrl = encode(rootURL, "api/v3/repos/" + owner + "/" + name + "/contents/" + path, null); + final String viewUrl = encode(rootURL, owner + "/" + name + "/blob/" + refStr + "/" + path, null); if (logger.isInfoEnabled()) { logger.info("Get a content from " + apiUrl); @@ -405,7 +406,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { return; } - final String url = rootURL + "api/v3/repos/" + owner + "/" + name + "/contents/" + encode(path) + "?ref=" + refStr; + final String url = encode(rootURL, "api/v3/repos/" + owner + "/" + name + "/contents/" + path, "ref=" + refStr); try (CurlResponse curlResponse = Curl.get(url).header("Authorization", "token " + authToken).execute()) { final InputStream iStream = curlResponse.getContentAsStream(); @@ -414,8 +415,7 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { for (int i = 0; i < fileList.size(); ++i) { @SuppressWarnings("unchecked") final Map file = (Map) fileList.get(i); - final String fname = encode(file.get("name")); - final String newPath = path.isEmpty() ? fname : path + "/" + fname; + final String newPath = path.isEmpty() ? file.get("name") : path + "/" + file.get("name"); switch (file.get("type")) { case "file": consumer.accept(newPath); @@ -433,13 +433,19 @@ public class GitBucketDataStoreImpl extends AbstractDataStoreImpl { } } - private String encode(final String s) { + private String encode(final String rootURL, final String path, final String query) { try { - final String encoded = URLEncoder.encode(s, Constants.UTF_8); - return encoded; - } catch (UnsupportedEncodingException e) { - logger.warn("Failed to encode \"" + s + "\"", e); - return s; + final URI rootURI = new URI(rootURL); + final URI uri = + new URI(rootURI.getScheme(), rootURI.getUserInfo(), rootURI.getHost(), rootURI.getPort(), rootURI.getPath() + path, + query, null); + return uri.toASCIIString(); + } catch (final URISyntaxException e) { + logger.warn("Failed to parse " + rootURL + path + "?" + query, e); + if (StringUtil.isEmpty(query)) { + return rootURL + path; + } + return rootURL + path + "?" + query; } } } diff --git a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java index 6d48c7bf5..bbe97629a 100644 --- a/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java +++ b/src/test/java/org/codelibs/fess/crawler/transformer/FessFileTransformerTest.java @@ -59,6 +59,23 @@ public class FessFileTransformerTest extends UnitFessTestCase { url = "file://C .doc"; exp = "file://C .doc"; assertEquals(exp, transformer.decodeUrlAsName(url, true)); + + url = "http://example.com/foo/" + encodeUrl("#") + "/@@bar/index.html#fragment?foo=bar"; + exp = "http://example.com/foo/#/@@bar/index.html#fragment?foo=bar"; + assertEquals(exp, transformer.decodeUrlAsName(url, false)); + } + + public void test_getFileName_ok() throws Exception { + String url, exp; + final FessFileTransformer transformer = createInstance(); + + url = "http://example.com/" + encodeUrl("#") + "/@@bar/index.html#fragment?foo=bar"; + exp = "index.html"; + assertEquals(exp, transformer.getFileName(url, Constants.UTF_8)); + + url = "http://example.com/" + encodeUrl("#") + "/@@folder/test.txt"; + exp = "test.txt"; + assertEquals(exp, transformer.getFileName(url, Constants.UTF_8)); } public void test_decodeUrl_null() throws Exception {