/*
 * Decompiled with CFR 0.152.
 */
package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.AbstractLoader;
import weka.core.converters.BatchConverter;
import weka.core.converters.IncrementalConverter;

public class TextDirectoryLoader
extends AbstractLoader
implements BatchConverter,
IncrementalConverter,
OptionHandler {
    private static final long serialVersionUID = 2592118773712247647L;
    protected Instances m_structure = null;
    protected File m_sourceFile = new File(System.getProperty("user.dir"));
    protected boolean m_Debug = false;
    protected boolean m_OutputFilename = false;
    protected String m_charSet = "";
    protected boolean m_retainStringValues = false;
    protected List<LinkedList<String>> m_filesByClass;
    protected int m_lastClassDir = 0;

    public TextDirectoryLoader() {
        this.setRetrieval(0);
    }

    public String globalInfo() {
        return "Loads all text files in a directory and uses the subdirectory names as class labels. The content of the text files will be stored in a String attribute, the filename can be stored as well.";
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.add(new Option("\tEnables debug output.\n\t(default: off)", "D", 0, "-D"));
        result.add(new Option("\tStores the filename in an additional attribute.\n\t(default: off)", "F", 0, "-F"));
        result.add(new Option("\tThe directory to work on.\n\t(default: current directory)", "dir", 0, "-dir <directory>"));
        result.add(new Option("\tThe character set to use, e.g UTF-8.\n\t(default: use the default character set)", "charset", 1, "-charset <charset name>"));
        result.add(new Option("\tRetain all string attribute values when reading incrementally.", "R", 0, "-R"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.setDebug(Utils.getFlag("D", options));
        this.setOutputFilename(Utils.getFlag("F", options));
        this.setDirectory(new File(Utils.getOption("dir", options)));
        String charSet = Utils.getOption("charset", options);
        this.m_charSet = "";
        if (charSet.length() > 0) {
            this.m_charSet = charSet;
        }
        this.setRetainStringValues(Utils.getFlag('R', options));
    }

    @Override
    public String[] getOptions() {
        Vector<String> options = new Vector<String>();
        if (this.getDebug()) {
            options.add("-D");
        }
        if (this.getOutputFilename()) {
            options.add("-F");
        }
        options.add("-dir");
        options.add(this.getDirectory().getAbsolutePath());
        if (this.m_charSet != null && this.m_charSet.length() > 0) {
            options.add("-charset");
            options.add(this.m_charSet);
        }
        if (this.getRetainStringValues()) {
            options.add("-R");
        }
        return options.toArray(new String[options.size()]);
    }

    public String charSetTipText() {
        return "The character set to use when reading text files (eg UTF-8) - leave blank to use the default character set.";
    }

    public void setCharSet(String charSet) {
        this.m_charSet = charSet;
    }

    public String getCharSet() {
        return this.m_charSet;
    }

    public void setDebug(boolean value) {
        this.m_Debug = value;
    }

    public boolean getDebug() {
        return this.m_Debug;
    }

    public String debugTipText() {
        return "Whether to print additional debug information to the console.";
    }

    public void setOutputFilename(boolean value) {
        this.m_OutputFilename = value;
        this.reset();
    }

    public boolean getOutputFilename() {
        return this.m_OutputFilename;
    }

    public String outputFilenameTipText() {
        return "Whether to store the filename in an additional attribute.";
    }

    public String getFileDescription() {
        return "Directories";
    }

    public File getDirectory() {
        return new File(this.m_sourceFile.getAbsolutePath());
    }

    public void setDirectory(File dir) throws IOException {
        this.setSource(dir);
    }

    public void setRetainStringValues(boolean r) {
        this.m_retainStringValues = r;
    }

    public boolean getRetainStringValues() {
        return this.m_retainStringValues;
    }

    public String retainStringValuesTipText() {
        return "When reading incrementally, whether to retain all values for string attributes. When set to false only the values for string attributes in the currently read instance will be held in memory.";
    }

    @Override
    public void reset() {
        this.m_structure = null;
        this.m_filesByClass = null;
        this.m_lastClassDir = 0;
        this.setRetrieval(0);
    }

    @Override
    public void setSource(File dir) throws IOException {
        this.reset();
        if (dir == null) {
            throw new IOException("Source directory object is null!");
        }
        this.m_sourceFile = dir;
        if (!dir.exists() || !dir.isDirectory()) {
            throw new IOException("Directory '" + dir + "' not found");
        }
    }

    @Override
    public Instances getStructure() throws IOException {
        if (this.getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        if (this.m_structure == null) {
            String directoryPath = this.getDirectory().getAbsolutePath();
            ArrayList<Attribute> atts = new ArrayList<Attribute>();
            ArrayList<String> classes = new ArrayList<String>();
            File dir = new File(directoryPath);
            String[] subdirs = dir.list();
            for (int i = 0; i < subdirs.length; ++i) {
                File subdir = new File(directoryPath + File.separator + subdirs[i]);
                if (!subdir.isDirectory()) continue;
                classes.add(subdirs[i]);
            }
            atts.add(new Attribute("text", (List<String>)null));
            if (this.m_OutputFilename) {
                atts.add(new Attribute("filename", (List<String>)null));
            }
            atts.add(new Attribute("@@class@@", classes));
            String relName = directoryPath.replaceAll("/", "_");
            relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
            this.m_structure = new Instances(relName, atts, 0);
            this.m_structure.setClassIndex(this.m_structure.numAttributes() - 1);
        }
        return this.m_structure;
    }

    @Override
    public Instances getDataSet() throws IOException {
        if (this.getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        String directoryPath = this.getDirectory().getAbsolutePath();
        ArrayList<String> classes = new ArrayList<String>();
        Enumeration enm = this.getStructure().classAttribute().enumerateValues();
        while (enm.hasMoreElements()) {
            classes.add((String)enm.nextElement());
        }
        Instances data = this.getStructure();
        int fileCount = 0;
        for (int k = 0; k < classes.size(); ++k) {
            String subdirPath = (String)classes.get(k);
            File subdir = new File(directoryPath + File.separator + subdirPath);
            String[] files = subdir.list();
            for (int j = 0; j < files.length; ++j) {
                try {
                    int c;
                    ++fileCount;
                    if (this.getDebug()) {
                        System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);
                    }
                    double[] newInst = null;
                    newInst = this.m_OutputFilename ? new double[3] : new double[2];
                    File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                    BufferedReader is = this.m_charSet == null || this.m_charSet.length() == 0 ? new BufferedReader(new InputStreamReader(new FileInputStream(txt))) : new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(txt), this.m_charSet));
                    StringBuffer txtStr = new StringBuffer();
                    while ((c = is.read()) != -1) {
                        txtStr.append((char)c);
                    }
                    newInst[0] = data.attribute(0).addStringValue(txtStr.toString());
                    if (this.m_OutputFilename) {
                        newInst[1] = data.attribute(1).addStringValue(subdirPath + File.separator + files[j]);
                    }
                    newInst[data.classIndex()] = k;
                    data.add(new DenseInstance(1.0, newInst));
                    is.close();
                    continue;
                }
                catch (Exception e) {
                    System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]);
                }
            }
        }
        return data;
    }

    @Override
    public Instance getNextInstance(Instances structure) throws IOException {
        boolean found;
        String directoryPath = this.getDirectory().getAbsolutePath();
        Attribute classAtt = structure.classAttribute();
        if (this.m_filesByClass == null) {
            this.m_filesByClass = new ArrayList<LinkedList<String>>();
            for (int i = 0; i < classAtt.numValues(); ++i) {
                File classDir = new File(directoryPath + File.separator + classAtt.value(i));
                String[] files = classDir.list();
                LinkedList<String> classDocs = new LinkedList<String>();
                for (String cd : files) {
                    File txt = new File(directoryPath + File.separator + classAtt.value(i) + File.separator + cd);
                    if (!txt.isFile()) continue;
                    classDocs.add(cd);
                }
                this.m_filesByClass.add(classDocs);
            }
        }
        int count = 0;
        LinkedList<String> classContents = this.m_filesByClass.get(this.m_lastClassDir);
        boolean bl = found = classContents.size() > 0;
        while (classContents.size() == 0) {
            ++this.m_lastClassDir;
            ++count;
            if (this.m_lastClassDir == structure.classAttribute().numValues()) {
                this.m_lastClassDir = 0;
            }
            if ((classContents = this.m_filesByClass.get(this.m_lastClassDir)).size() > 0) {
                found = true;
                break;
            }
            if (count != structure.classAttribute().numValues()) continue;
        }
        if (found) {
            int c;
            String nextDoc = classContents.poll();
            File txt = new File(directoryPath + File.separator + classAtt.value(this.m_lastClassDir) + File.separator + nextDoc);
            BufferedReader is = this.m_charSet == null || this.m_charSet.length() == 0 ? new BufferedReader(new InputStreamReader(new FileInputStream(txt))) : new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(txt), this.m_charSet));
            StringBuffer txtStr = new StringBuffer();
            while ((c = is.read()) != -1) {
                txtStr.append((char)c);
            }
            double[] newInst = null;
            newInst = this.m_OutputFilename ? new double[3] : new double[2];
            if (this.getRetainStringValues()) {
                newInst[0] = structure.attribute(0).addStringValue(txtStr.toString());
            } else {
                newInst[0] = 0.0;
                structure.attribute(0).setStringValue(txtStr.toString());
            }
            if (this.m_OutputFilename) {
                if (this.getRetainStringValues()) {
                    newInst[1] = structure.attribute(1).addStringValue(txt.getAbsolutePath());
                } else {
                    newInst[1] = 0.0;
                    structure.attribute(1).setStringValue(txt.getAbsolutePath());
                }
            }
            newInst[structure.classIndex()] = this.m_lastClassDir;
            DenseInstance inst = new DenseInstance(1.0, newInst);
            inst.setDataset(structure);
            is.close();
            ++this.m_lastClassDir;
            if (this.m_lastClassDir == structure.classAttribute().numValues()) {
                this.m_lastClassDir = 0;
            }
            return inst;
        }
        return null;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 7784 $");
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            try {
                Instance temp;
                TextDirectoryLoader loader = new TextDirectoryLoader();
                loader.setOptions(args);
                Instances structure = loader.getStructure();
                System.out.println(structure);
                do {
                    if ((temp = loader.getNextInstance(structure)) == null) continue;
                    System.out.println(temp);
                } while (temp != null);
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            System.err.println("\nUsage:\n\tTextDirectoryLoader [options]\n\nOptions:\n");
            Enumeration enm = new TextDirectoryLoader().listOptions();
            while (enm.hasMoreElements()) {
                Option option = (Option)enm.nextElement();
                System.err.println(option.synopsis());
                System.err.println(option.description());
            }
            System.err.println();
        }
    }
}

