package de.lmu.ifi.dbs.elki.datasource.parser;

import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.HierarchicalClassLabel;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource;
import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import gnu.trove.list.array.TDoubleArrayList;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.class */
public class NumberVectorLabelParser<V extends NumberVector<V, ?>> extends AbstractStreamingParser {
    private static final Logging logger = Logging.getLogger((Class<?>) NumberVectorLabelParser.class);
    public static final OptionID LABEL_INDICES_ID = OptionID.getOrCreateOptionID("parser.labelIndices", "A comma separated list of the indices of labels (may be numeric), counting whitespace separated entries in a line starting with 0. The corresponding entries will be treated as a label.");
    public static final OptionID VECTOR_TYPE_ID = OptionID.getOrCreateOptionID("parser.vector-type", "The type of vectors to create for numerical attributes.");
    public static final int DIMENSIONALITY_UNKNOWN = -1;
    public static final int DIMENSIONALITY_VARIABLE = -2;
    protected BitSet labelIndices;
    protected V factory;
    private BufferedReader reader;
    protected int lineNumber;
    protected int dimensionality;
    protected BundleMeta meta;
    protected List<String> columnnames;
    protected BitSet labelcolumns;
    protected V curvec;
    protected LabelList curlbl;
    BundleStreamSource.Event nextevent;

    /* loaded from: input_file:de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser$Parameterizer.class */
    public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParser.Parameterizer {
        protected BitSet labelIndices = null;
        protected V factory;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser.Parameterizer, de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public void makeOptions(Parameterization parameterization) {
            super.makeOptions(parameterization);
            getLabelIndices(parameterization);
            getFactory(parameterization);
        }

        protected void getFactory(Parameterization parameterization) {
            ObjectParameter objectParameter = new ObjectParameter(NumberVectorLabelParser.VECTOR_TYPE_ID, (Class<?>) NumberVector.class, (Class<?>) DoubleVector.class);
            if (parameterization.grab(objectParameter)) {
                this.factory = (V) objectParameter.instantiateClass(parameterization);
            }
        }

        protected void getLabelIndices(Parameterization parameterization) {
            IntListParameter intListParameter = new IntListParameter(NumberVectorLabelParser.LABEL_INDICES_ID, true);
            this.labelIndices = new BitSet();
            if (parameterization.grab(intListParameter)) {
                Iterator it = intListParameter.getValue().iterator();
                while (it.hasNext()) {
                    this.labelIndices.set(((Integer) it.next()).intValue());
                }
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser.Parameterizer, de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer
        public NumberVectorLabelParser<V> makeInstance() {
            return new NumberVectorLabelParser<>(this.colSep, this.quoteChar, this.labelIndices, this.factory);
        }
    }

    public NumberVectorLabelParser(V v) {
        this(Pattern.compile(AbstractParser.DEFAULT_SEPARATOR), '\"', null, v);
    }

    public NumberVectorLabelParser(Pattern pattern, char c, BitSet bitSet, V v) {
        super(pattern, c);
        this.meta = null;
        this.columnnames = null;
        this.labelcolumns = null;
        this.curvec = null;
        this.curlbl = null;
        this.nextevent = null;
        this.labelIndices = bitSet;
        this.factory = v;
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser
    public void initStream(InputStream inputStream) {
        this.reader = new BufferedReader(new InputStreamReader(inputStream));
        this.lineNumber = 1;
        this.dimensionality = -1;
        this.columnnames = null;
        this.labelcolumns = new BitSet();
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource
    public BundleMeta getMeta() {
        return this.meta;
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource
    public BundleStreamSource.Event nextEvent() {
        if (this.nextevent != null) {
            BundleStreamSource.Event event = this.nextevent;
            this.nextevent = null;
            return event;
        }
        while (true) {
            try {
                String readLine = this.reader.readLine();
                if (readLine == null) {
                    this.reader.close();
                    this.reader = null;
                    return BundleStreamSource.Event.END_OF_STREAM;
                }
                if (!readLine.startsWith("#") && readLine.length() > 0) {
                    parseLineInternal(readLine);
                    if (this.curvec != null) {
                        if (this.dimensionality == -1) {
                            this.dimensionality = this.curvec.getDimensionality();
                            buildMeta();
                            this.nextevent = BundleStreamSource.Event.NEXT_OBJECT;
                            return BundleStreamSource.Event.META_CHANGED;
                        }
                        if (this.dimensionality > 0) {
                            if (this.dimensionality != this.curvec.getDimensionality()) {
                                this.dimensionality = -2;
                                buildMeta();
                                this.nextevent = BundleStreamSource.Event.NEXT_OBJECT;
                                return BundleStreamSource.Event.META_CHANGED;
                            }
                        } else if (this.curlbl != null && this.meta != null && this.meta.size() == 1) {
                            buildMeta();
                            this.nextevent = BundleStreamSource.Event.NEXT_OBJECT;
                            return BundleStreamSource.Event.META_CHANGED;
                        }
                        return BundleStreamSource.Event.NEXT_OBJECT;
                    }
                }
                this.lineNumber++;
            } catch (IOException e) {
                throw new IllegalArgumentException("Error while parsing line " + this.lineNumber + HierarchicalClassLabel.DEFAULT_SEPARATOR_STRING);
            }
        }
    }

    protected void buildMeta() {
        if (this.labelcolumns.cardinality() <= 0) {
            this.meta = new BundleMeta(1);
            this.meta.add(getTypeInformation(this.dimensionality));
        } else {
            this.meta = new BundleMeta(2);
            this.meta.add(getTypeInformation(this.dimensionality));
            this.meta.add(TypeUtil.LABELLIST);
        }
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource
    public Object data(int i) {
        if (i == 0) {
            return this.curvec;
        }
        if (i == 1) {
            return this.curlbl;
        }
        throw new ArrayIndexOutOfBoundsException();
    }

    protected void parseLineInternal(String str) {
        List<String> list = tokenize(str);
        TDoubleArrayList tDoubleArrayList = new TDoubleArrayList(list.size());
        LabelList labelList = null;
        int i = 0;
        for (String str2 : list) {
            if (!this.labelIndices.get(i)) {
                try {
                    tDoubleArrayList.add(Double.parseDouble(str2));
                } catch (NumberFormatException e) {
                    this.labelcolumns.set(i);
                }
                i++;
            }
            if (labelList == null) {
                labelList = new LabelList(1);
            }
            labelList.add(str2);
            i++;
        }
        if (this.lineNumber == 1 && tDoubleArrayList.size() == 0) {
            this.columnnames = labelList;
            this.labelcolumns.clear();
            this.curvec = null;
            this.curlbl = null;
        } else {
            this.curvec = createDBObject(tDoubleArrayList, ArrayLikeUtil.TDOUBLELISTADAPTER);
            this.curlbl = labelList;
        }
    }

    protected <A> V createDBObject(A a, NumberArrayAdapter<?, A> numberArrayAdapter) {
        return (V) this.factory.newNumberVector(a, numberArrayAdapter);
    }

    SimpleTypeInformation<V> getTypeInformation(int i) {
        Class<?> cls = this.factory.getClass();
        if (i <= 0) {
            if (i != -2) {
                throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
            }
            NumberVector newNumberVector = this.factory.newNumberVector(new double[0]);
            return newNumberVector instanceof ByteBufferSerializer ? new SimpleTypeInformation<>(cls, (ByteBufferSerializer) newNumberVector) : new SimpleTypeInformation<>(cls);
        }
        String[] strArr = null;
        if (this.columnnames != null && this.columnnames.size() - this.labelcolumns.cardinality() == i) {
            strArr = new String[i];
            int i2 = 0;
            for (int i3 = 0; i3 < this.columnnames.size(); i3++) {
                if (!this.labelcolumns.get(i3)) {
                    strArr[i2] = this.columnnames.get(i3);
                    i2++;
                }
            }
        }
        NumberVector newNumberVector2 = this.factory.newNumberVector(new double[i]);
        return newNumberVector2 instanceof ByteBufferSerializer ? new VectorFieldTypeInformation(cls, (ByteBufferSerializer) newNumberVector2, i, strArr, newNumberVector2) : new VectorFieldTypeInformation((Class<? super NumberVector>) cls, i, strArr, newNumberVector2);
    }

    @Override // de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser
    protected Logging getLogger() {
        return logger;
    }
}
