Add charsetter project

This commit is contained in:
Christopher Schnick 2022-05-20 13:30:29 +02:00
parent 420641fe51
commit c6ffd11336
6 changed files with 155 additions and 0 deletions

14
charsetter/build.gradle Normal file
View file

@ -0,0 +1,14 @@
plugins {
id 'java'
id "org.moditect.gradleplugin" version "1.0.0-rc3"
}
apply from: "$rootDir/deps/java.gradle"
apply from: "$rootDir/deps/commons.gradle"
apply from: "$rootDir/deps/junit.gradle"
apply from: "$rootDir/deps/lombok.gradle"
configurations {
compileOnly.extendsFrom(dep)
}

View file

@ -0,0 +1,8 @@
package io.xpipe.charsetter;
import java.nio.charset.Charset;
public interface Charsettable {
Charset getCharset();
}

View file

@ -0,0 +1,74 @@
package io.xpipe.charsetter;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang3.function.FailableBiConsumer;
import org.apache.commons.lang3.function.FailableSupplier;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;
public class Charsetter {
private static CharsetterUniverse universe;
private static final int MAX_BYTES = 8192;
public static void init(CharsetterContext ctx) {
universe = CharsetterUniverse.create(ctx);
}
private static void checkInit() {
if (universe == null) {
throw new IllegalStateException("Charsetter not initialized");
}
}
public static Charset read(FailableSupplier<InputStream, Exception> in, FailableBiConsumer<InputStream, Charset, Exception> con) throws Exception {
checkInit();
try (var is = in.get();
var bin = new BOMInputStream(is)) {
ByteOrderMark bom = bin.getBOM();
String charsetName = bom == null ? null : bom.getCharsetName();
var charset = charsetName != null ? Charset.forName(charsetName) : null;
if (charset == null) {
bin.mark(MAX_BYTES);
var bytes = bin.readNBytes(MAX_BYTES);
bin.reset();
charset = inferCharset(bytes);
}
if (con != null) {
con.accept(bin, charset);
}
return charset;
}
}
public static Charset inferCharset(byte[] content) {
checkInit();
for (Charset c : universe.getCharsets()) {
CharsetDecoder decoder = c.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
ByteBuffer byteBuf = ByteBuffer.wrap(content);
CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2);
CoderResult coderResult = decoder.decode(byteBuf, charBuf, false);
if (coderResult != null) {
if (coderResult.isError()) {
continue;
}
}
return c;
}
return StandardCharsets.UTF_8;
}
}

View file

@ -0,0 +1,20 @@
package io.xpipe.charsetter;
import lombok.AllArgsConstructor;
import lombok.Value;
import java.util.List;
import java.util.Locale;
@Value
@AllArgsConstructor
public class CharsetterContext {
String systemCharsetName;
Locale systemLocale;
Locale appLocale;
List<String> observedCharsets;
}

View file

@ -0,0 +1,32 @@
package io.xpipe.charsetter;
import lombok.AllArgsConstructor;
import lombok.Value;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@Value
@AllArgsConstructor
public class CharsetterUniverse {
List<Charset> charsets;
public static CharsetterUniverse create(CharsetterContext ctx) {
List<Charset> cs = new ArrayList<>();
cs.add(StandardCharsets.UTF_8);
var system = Charset.forName(ctx.getSystemCharsetName());
cs.add(system);
// TODO: Locales
var observed = ctx.getObservedCharsets().stream().map(Charset::forName).toList();
cs.addAll(observed);
return new CharsetterUniverse(cs);
}
}

View file

@ -0,0 +1,7 @@
module io.xpipe.charsetter {
exports io.xpipe.charsetter;
requires org.apache.commons.io;
requires org.apache.commons.lang3;
requires static lombok;
}