/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.process.ListProcessor;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class WordToSentenceProcessor<IN>
implements ListProcessor<IN, List<IN>> {
    private static final boolean DEBUG = false;
    private Pattern sentenceBoundaryTokenPattern;
    private Set<String> sentenceBoundaryFollowers;
    private Set<Pattern> sentenceBoundaryToDiscard;
    private Pattern sentenceRegionBeginPattern;
    private Pattern sentenceRegionEndPattern;
    private boolean isOneSentence;

    public void setSentenceBoundaryToDiscard(Set<String> set) {
        this.sentenceBoundaryToDiscard = new HashSet<Pattern>();
        for (String s : set) {
            this.sentenceBoundaryToDiscard.add(Pattern.compile(s));
        }
    }

    public boolean isOneSentence() {
        return this.isOneSentence;
    }

    public void setOneSentence(boolean oneSentence) {
        this.isOneSentence = oneSentence;
    }

    public void addHtmlSentenceBoundaryToDiscard(Set<String> set) {
        if (this.sentenceBoundaryToDiscard == null) {
            this.sentenceBoundaryToDiscard = new HashSet<Pattern>();
        }
        for (String s : set) {
            this.sentenceBoundaryToDiscard.addAll(WordToSentenceProcessor.toHtmlPattern(s));
        }
    }

    private static Set<Pattern> toHtmlPattern(String tag) {
        HashSet<Pattern> patterns = new HashSet<Pattern>();
        patterns.add(Pattern.compile("<\\s*/?\\s*" + tag + "\\s*/?\\s*>", 2));
        patterns.add(Pattern.compile("<\\s*" + tag + "\\s+[^>]+>", 2));
        return patterns;
    }

    private boolean matchesSentenceBoundaryToDiscard(String word) {
        for (Pattern p : this.sentenceBoundaryToDiscard) {
            Matcher m = p.matcher(word);
            if (!m.matches()) continue;
            return true;
        }
        return false;
    }

    @Override
    public List<List<IN>> process(List<? extends IN> words) {
        if (this.isOneSentence) {
            ArrayList<List<IN>> sentences = Generics.newArrayList();
            sentences.add(new ArrayList<IN>(words));
            return sentences;
        }
        return this.wordsToSentences(words);
    }

    public List<List<IN>> wordsToSentences(List<? extends IN> words) {
        ArrayList<List<IN>> sentences = Generics.newArrayList();
        ArrayList<IN> currentSentence = null;
        ArrayList<IN> lastSentence = null;
        boolean insideRegion = false;
        for (IN o : words) {
            Boolean forcedEndValue;
            String word;
            if (o instanceof HasWord) {
                HasWord h = (HasWord)o;
                word = h.word();
            } else if (o instanceof String) {
                word = (String)o;
            } else if (o instanceof CoreMap) {
                word = (String)((CoreMap)o).get(CoreAnnotations.TextAnnotation.class);
            } else {
                throw new RuntimeException("Expected token to be either Word or String.");
            }
            boolean forcedEnd = false;
            if (o instanceof CoreMap && (forcedEndValue = (Boolean)((CoreMap)o).get(CoreAnnotations.ForcedSentenceEndAnnotation.class)) != null) {
                forcedEnd = forcedEndValue;
            }
            if (currentSentence == null) {
                currentSentence = new ArrayList<IN>();
            }
            if (this.sentenceRegionBeginPattern != null && !insideRegion) {
                if (!this.sentenceRegionBeginPattern.matcher(word).matches()) continue;
                insideRegion = true;
                continue;
            }
            if (this.sentenceBoundaryFollowers.contains(word) && lastSentence != null && currentSentence.isEmpty()) {
                lastSentence.add(o);
                continue;
            }
            boolean newSent = false;
            if (this.matchesSentenceBoundaryToDiscard(word)) {
                newSent = true;
            } else if (this.sentenceRegionEndPattern != null && this.sentenceRegionEndPattern.matcher(word).matches()) {
                insideRegion = false;
                newSent = true;
            } else if (this.sentenceBoundaryTokenPattern.matcher(word).matches()) {
                currentSentence.add(o);
                newSent = true;
            } else if (forcedEnd) {
                currentSentence.add(o);
                newSent = true;
            } else {
                currentSentence.add(o);
            }
            if (!newSent || currentSentence.size() <= 0) continue;
            sentences.add(currentSentence);
            lastSentence = currentSentence;
            currentSentence = null;
        }
        if (currentSentence != null && currentSentence.size() > 0) {
            sentences.add(currentSentence);
        }
        return sentences;
    }

    public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
        Document doc = in.blankDocument();
        doc.addAll(this.process(in));
        return doc;
    }

    public WordToSentenceProcessor() {
        this("\\.|[!?]+");
    }

    public WordToSentenceProcessor(String boundaryTokens) {
        this(boundaryTokens, Generics.newHashSet(Arrays.asList(")", "]", "\"", "'", "''", "-RRB-", "-RSB-", "-RCB-")));
    }

    public WordToSentenceProcessor(String boundaryTokens, Set<String> boundaryFollowers) {
        this(boundaryTokens, boundaryFollowers, Collections.singleton("\n"));
    }

    public WordToSentenceProcessor(String boundaryTokens, Set<String> boundaryFollowers, Set<String> boundaryToDiscard) {
        this(boundaryTokens, boundaryFollowers, boundaryToDiscard, null, null);
    }

    public WordToSentenceProcessor(Pattern regionBeginPattern, Pattern regionEndPattern) {
        this("", Collections.emptySet(), Collections.emptySet(), regionBeginPattern, regionEndPattern);
    }

    private WordToSentenceProcessor(String boundaryTokens, Set<String> boundaryFollowers, Set<String> boundaryToDiscard, Pattern regionBeginPattern, Pattern regionEndPattern) {
        this.sentenceBoundaryTokenPattern = Pattern.compile(boundaryTokens);
        this.sentenceBoundaryFollowers = boundaryFollowers;
        this.setSentenceBoundaryToDiscard(boundaryToDiscard);
        this.sentenceRegionBeginPattern = regionBeginPattern;
        this.sentenceRegionEndPattern = regionEndPattern;
    }
}

