package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.StringLengthConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.util.XMLConstants;

/* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.class */
public abstract class AbstractParser {
    public static final String DEFAULT_SEPARATOR = "(\\s+|\\s*[,;]\\s*)";
    public static final char QUOTE_CHAR = '\"';
    public static final String NUMBER_PATTERN = "[+-]?(?:\\d+\\.?|\\d*\\.\\d+)?(?:[eE][-]?\\d+)?";
    public static final OptionID COLUMN_SEPARATOR_ID = OptionID.getOrCreateOptionID("parser.colsep", "Column separator pattern. The default assumes whitespace separated data.");
    public static final OptionID QUOTE_ID = OptionID.getOrCreateOptionID("parser.quote", "Quotation character. The default is to use a double quote.");
    private Pattern colSep;
    protected char quoteChar;
    public static final String COMMENT = "#";
    public static final String ATTRIBUTE_CONCATENATION = " ";

    /* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser$Parameterizer.class */
    public static abstract class Parameterizer extends AbstractParameterizer {
        protected Pattern colSep = null;
        protected char quoteChar = '\"';

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public void makeOptions(Parameterization parameterization) {
            super.makeOptions(parameterization);
            PatternParameter patternParameter = new PatternParameter(AbstractParser.COLUMN_SEPARATOR_ID, AbstractParser.DEFAULT_SEPARATOR);
            if (parameterization.grab(patternParameter)) {
                this.colSep = patternParameter.getValue();
            }
            StringParameter stringParameter = new StringParameter(AbstractParser.QUOTE_ID, new StringLengthConstraint(1, 1), XMLConstants.XML_DOUBLE_QUOTE);
            if (parameterization.grab(stringParameter)) {
                this.quoteChar = stringParameter.getValue().charAt(0);
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public abstract AbstractParser makeInstance();
    }

    public AbstractParser(Pattern pattern, char c) {
        this.colSep = null;
        this.quoteChar = '\"';
        this.colSep = pattern;
        this.quoteChar = c;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = this.colSep.matcher(str);
        int i = 0;
        boolean z = str.length() > 0 && str.charAt(0) == this.quoteChar;
        while (matcher.find()) {
            if (!z || matcher.start() <= 0) {
                if (i < matcher.start()) {
                    arrayList.add(str.substring(i, matcher.start()));
                }
                i = matcher.end();
                z = i < str.length() && str.charAt(i) == this.quoteChar;
            } else if (matcher.start() > i + 1 && str.charAt(matcher.start() - 1) == this.quoteChar) {
                if (i + 1 < matcher.start() - 1) {
                    arrayList.add(str.substring(i + 1, matcher.start() - 1));
                }
                i = matcher.end();
                z = i < str.length() && str.charAt(i) == this.quoteChar;
            }
        }
        if (i == 0) {
            arrayList.add(str);
            return arrayList;
        }
        if (z) {
            if (str.charAt(str.length() - 1) != this.quoteChar) {
                getLogger().warning("Invalid quoted line in input.");
                if (i < str.length()) {
                    arrayList.add(str.substring(i, str.length()));
                }
            } else if (i + 1 < str.length() - 1) {
                arrayList.add(str.substring(i + 1, str.length() - 1));
            }
        } else if (i < str.length()) {
            arrayList.add(str.substring(i, str.length()));
        }
        return arrayList;
    }

    protected abstract Logging getLogger();

    public String toString() {
        return getClass().getName();
    }
}
