mirror of
https://github.com/xpipe-io/xpipe.git
synced 2024-11-21 23:20:23 +00:00
Add charsetter project
This commit is contained in:
parent
420641fe51
commit
c6ffd11336
6 changed files with 155 additions and 0 deletions
14
charsetter/build.gradle
Normal file
14
charsetter/build.gradle
Normal file
|
@ -0,0 +1,14 @@
|
|||
plugins {
|
||||
id 'java'
|
||||
id "org.moditect.gradleplugin" version "1.0.0-rc3"
|
||||
}
|
||||
|
||||
apply from: "$rootDir/deps/java.gradle"
|
||||
apply from: "$rootDir/deps/commons.gradle"
|
||||
apply from: "$rootDir/deps/junit.gradle"
|
||||
apply from: "$rootDir/deps/lombok.gradle"
|
||||
|
||||
configurations {
|
||||
compileOnly.extendsFrom(dep)
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
package io.xpipe.charsetter;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
public interface Charsettable {
|
||||
|
||||
Charset getCharset();
|
||||
}
|
74
charsetter/src/main/java/io/xpipe/charsetter/Charsetter.java
Normal file
74
charsetter/src/main/java/io/xpipe/charsetter/Charsetter.java
Normal file
|
@ -0,0 +1,74 @@
|
|||
package io.xpipe.charsetter;
|
||||
|
||||
import org.apache.commons.io.ByteOrderMark;
|
||||
import org.apache.commons.io.input.BOMInputStream;
|
||||
import org.apache.commons.lang3.function.FailableBiConsumer;
|
||||
import org.apache.commons.lang3.function.FailableSupplier;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.*;
|
||||
|
||||
public class Charsetter {
|
||||
|
||||
private static CharsetterUniverse universe;
|
||||
private static final int MAX_BYTES = 8192;
|
||||
|
||||
public static void init(CharsetterContext ctx) {
|
||||
universe = CharsetterUniverse.create(ctx);
|
||||
}
|
||||
|
||||
private static void checkInit() {
|
||||
if (universe == null) {
|
||||
throw new IllegalStateException("Charsetter not initialized");
|
||||
}
|
||||
}
|
||||
|
||||
public static Charset read(FailableSupplier<InputStream, Exception> in, FailableBiConsumer<InputStream, Charset, Exception> con) throws Exception {
|
||||
checkInit();
|
||||
|
||||
try (var is = in.get();
|
||||
var bin = new BOMInputStream(is)) {
|
||||
ByteOrderMark bom = bin.getBOM();
|
||||
String charsetName = bom == null ? null : bom.getCharsetName();
|
||||
var charset = charsetName != null ? Charset.forName(charsetName) : null;
|
||||
|
||||
if (charset == null) {
|
||||
bin.mark(MAX_BYTES);
|
||||
var bytes = bin.readNBytes(MAX_BYTES);
|
||||
bin.reset();
|
||||
charset = inferCharset(bytes);
|
||||
}
|
||||
|
||||
if (con != null) {
|
||||
con.accept(bin, charset);
|
||||
}
|
||||
return charset;
|
||||
}
|
||||
}
|
||||
|
||||
public static Charset inferCharset(byte[] content) {
|
||||
checkInit();
|
||||
|
||||
for (Charset c : universe.getCharsets()) {
|
||||
CharsetDecoder decoder = c.newDecoder();
|
||||
decoder.onMalformedInput(CodingErrorAction.REPORT);
|
||||
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
|
||||
ByteBuffer byteBuf = ByteBuffer.wrap(content);
|
||||
CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2);
|
||||
|
||||
CoderResult coderResult = decoder.decode(byteBuf, charBuf, false);
|
||||
if (coderResult != null) {
|
||||
if (coderResult.isError()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
return StandardCharsets.UTF_8;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package io.xpipe.charsetter;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Value;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
@Value
|
||||
@AllArgsConstructor
|
||||
public class CharsetterContext {
|
||||
|
||||
String systemCharsetName;
|
||||
|
||||
Locale systemLocale;
|
||||
|
||||
Locale appLocale;
|
||||
|
||||
List<String> observedCharsets;
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package io.xpipe.charsetter;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Value;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Value
|
||||
@AllArgsConstructor
|
||||
public class CharsetterUniverse {
|
||||
|
||||
List<Charset> charsets;
|
||||
|
||||
public static CharsetterUniverse create(CharsetterContext ctx) {
|
||||
List<Charset> cs = new ArrayList<>();
|
||||
|
||||
cs.add(StandardCharsets.UTF_8);
|
||||
|
||||
var system = Charset.forName(ctx.getSystemCharsetName());
|
||||
cs.add(system);
|
||||
|
||||
// TODO: Locales
|
||||
|
||||
var observed = ctx.getObservedCharsets().stream().map(Charset::forName).toList();
|
||||
cs.addAll(observed);
|
||||
|
||||
return new CharsetterUniverse(cs);
|
||||
}
|
||||
}
|
7
charsetter/src/main/java/module-info.java
Normal file
7
charsetter/src/main/java/module-info.java
Normal file
|
@ -0,0 +1,7 @@
|
|||
module io.xpipe.charsetter {
|
||||
exports io.xpipe.charsetter;
|
||||
|
||||
requires org.apache.commons.io;
|
||||
requires org.apache.commons.lang3;
|
||||
requires static lombok;
|
||||
}
|
Loading…
Reference in a new issue