/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Numberer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FrenchUnknownWordModel
extends BaseUnknownWordModel {
    private static final long serialVersionUID = -776564693549194424L;
    protected boolean smartMutation = false;
    protected transient int lastSignatureIndex = -1;
    protected transient int lastSentencePosition = -1;
    protected transient int lastWordToSignaturize = -1;
    protected int unknownSuffixSize = 0;
    protected int unknownPrefixSize = 0;
    private static final String BOUNDARY_TAG = ".$$.";
    private transient Numberer tagNumberer;
    private transient Numberer wordNumberer;

    public FrenchUnknownWordModel(Options.LexOptions op, Lexicon lex) {
        super(op, lex);
        this.unknownLevel = op.useUnknownWordSignatures;
        this.smartMutation = op.smartMutation;
        this.unknownSuffixSize = op.unknownSuffixSize;
        this.unknownPrefixSize = op.unknownPrefixSize;
    }

    @Override
    public void train(Collection<Tree> trees) {
        this.train(trees, 1.0, false);
    }

    private void train(Collection<Tree> trees, double weight, boolean keepTagsAsLabels) {
        ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter<IntTaggedWord>();
        int tNum = 0;
        int tSize = trees.size();
        int indexToStartUnkCounting = (int)((double)tSize * Train.fractionBeforeUnseenCounting);
        for (Tree tree : trees) {
            ++tNum;
            List<IntTaggedWord> taggedWords = this.treeToEvents(tree, keepTagsAsLabels);
            int sz = taggedWords.size();
            for (int w = 0; w < sz; ++w) {
                IntTaggedWord iTW = taggedWords.get(w);
                IntTaggedWord iT = new IntTaggedWord(-1, iTW.tag);
                IntTaggedWord iW = new IntTaggedWord(iTW.word, -1);
                seenCounter.incrementCount(iW, weight);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                if (tNum <= indexToStartUnkCounting || !(seenCounter.getCount(iW) < 2.0)) continue;
                int s = this.getSignatureIndex(iTW.word, w);
                IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
                IntTaggedWord iS = new IntTaggedWord(s, -1);
                this.unSeenCounter.incrementCount(iTS, weight);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(iS, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
        if (this.unSeenCounter.isEmpty()) {
            System.err.printf("%s: WARNING: Unseen word counter is empty!%n", this.getClass().getName());
            int numTags = this.tagNumberer().total();
            for (int tt = 0; tt < numTags; ++tt) {
                if (BOUNDARY_TAG.equals(this.tagNumberer().object(tt))) continue;
                IntTaggedWord iT = new IntTaggedWord(-1, tt);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree, boolean keepTagsAsLabels) {
        if (!keepTagsAsLabels) {
            return this.treeToEvents(tree);
        }
        List<LabeledWord> labeledWords = tree.labeledYield();
        return this.listOfLabeledWordsToEvents(labeledWords);
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree) {
        ArrayList<TaggedWord> taggedWords = tree.taggedYield();
        return this.listToEvents(taggedWords);
    }

    protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (TaggedWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    protected List<IntTaggedWord> listOfLabeledWordsToEvents(List<LabeledWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (LabeledWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, double c_Tseen, double total, double smooth) {
        int word = iTW.word;
        short tag = iTW.tag;
        iTW.word = this.getSignatureIndex(iTW.word, loc);
        double c_TS = this.unSeenCounter.getCount(iTW);
        iTW.tag = (short)-1;
        double c_S = this.unSeenCounter.getCount(iTW);
        iTW.word = -1;
        double c_U = this.unSeenCounter.getCount(iTW);
        iTW.tag = tag;
        double c_T = this.unSeenCounter.getCount(iTW);
        iTW.word = word;
        double p_T_U = c_T / c_U;
        if (this.unknownLevel == 0) {
            c_TS = 0.0;
            c_S = 0.0;
        }
        double pb_T_S = (c_TS + smooth * p_T_U) / (c_S + smooth);
        double p_T = c_Tseen / total;
        double p_W = 1.0 / total;
        double pb_W_T = Math.log(pb_T_S * p_W / p_T);
        return (float)pb_W_T;
    }

    private Numberer tagNumberer() {
        if (this.tagNumberer == null) {
            this.tagNumberer = Numberer.getGlobalNumberer("tags");
        }
        return this.tagNumberer;
    }

    private Numberer wordNumberer() {
        if (this.wordNumberer == null) {
            this.wordNumberer = Numberer.getGlobalNumberer("words");
        }
        return this.wordNumberer;
    }

    @Override
    public int getSignatureIndex(int wordIndex, int sentencePosition) {
        int sig;
        if (wordIndex == this.lastWordToSignaturize && sentencePosition == this.lastSentencePosition) {
            return this.lastSignatureIndex;
        }
        String uwSig = this.getSignature((String)this.wordNumberer().object(wordIndex), sentencePosition);
        this.lastSignatureIndex = sig = this.wordNumberer().number(uwSig);
        this.lastSentencePosition = sentencePosition;
        this.lastWordToSignaturize = wordIndex;
        return sig;
    }

    @Override
    public String getSignature(String word, int loc) {
        String BASE_LABEL = "UNK";
        StringBuilder sb = new StringBuilder("UNK");
        switch (this.unknownLevel) {
            case 1: {
                sb.append(FrenchUnknownWordSignatures.nounSuffix(word));
                if (sb.toString().equals("UNK")) {
                    sb.append(FrenchUnknownWordSignatures.adjSuffix(word));
                    if (sb.toString().equals("UNK")) {
                        sb.append(FrenchUnknownWordSignatures.verbSuffix(word));
                        if (sb.toString().equals("UNK")) {
                            sb.append(FrenchUnknownWordSignatures.advSuffix(word));
                        }
                    }
                }
                sb.append(FrenchUnknownWordSignatures.possiblePlural(word));
                String hasDigit = FrenchUnknownWordSignatures.hasDigit(word);
                String isDigit = FrenchUnknownWordSignatures.isDigit(word);
                if (!hasDigit.equals("")) {
                    if (isDigit.equals("")) {
                        sb.append(hasDigit);
                    } else {
                        sb.append(isDigit);
                    }
                }
                sb.append(FrenchUnknownWordSignatures.hasPunc(word));
                sb.append(FrenchUnknownWordSignatures.isAllCaps(word));
                if (loc > 0 && FrenchUnknownWordSignatures.isAllCaps(word).equals("")) {
                    sb.append(FrenchUnknownWordSignatures.isCapitalized(word));
                }
                if (this.unknownSuffixSize <= 0 || !sb.toString().equals("UNK")) break;
                int min = word.length() < this.unknownSuffixSize ? word.length() : this.unknownSuffixSize;
                sb.append('-').append(word.substring(word.length() - min));
                break;
            }
            case 2: {
                if (!FrenchUnknownWordSignatures.advSuffix(word).equals("")) {
                    sb.append(FrenchUnknownWordSignatures.advSuffix(word));
                } else if (!FrenchUnknownWordSignatures.verbSuffix(word).equals("")) {
                    sb.append(FrenchUnknownWordSignatures.verbSuffix(word));
                } else if (!FrenchUnknownWordSignatures.nounSuffix(word).equals("")) {
                    sb.append(FrenchUnknownWordSignatures.nounSuffix(word));
                }
                sb.append(FrenchUnknownWordSignatures.adjSuffix(word));
                sb.append(FrenchUnknownWordSignatures.hasDigit(word));
                sb.append(FrenchUnknownWordSignatures.possiblePlural(word));
                if (FrenchUnknownWordSignatures.isPunc(word).equals("")) {
                    sb.append(FrenchUnknownWordSignatures.isPunc(word));
                } else {
                    sb.append(FrenchUnknownWordSignatures.hasPunc(word));
                }
                if (loc > 0) {
                    sb.append(FrenchUnknownWordSignatures.isCapitalized(word));
                }
            }
            default: {
                System.err.printf("%s: Invalid unknown word signature! (%d)%n", this.getClass().getName(), this.unknownLevel);
            }
        }
        return sb.toString();
    }

    private static class FrenchUnknownWordSignatures {
        private static final Pattern pNounSuffix = Pattern.compile("(ier|i\u00e8re|it\u00e9|ion|ison|isme|ysme|iste|esse|eur|euse|ence|eau|erie|ng|ette|age|ade|ance|ude|ogue|aphe|ate|duc|anthe|archie|coque|\u00e9r\u00e8se|ergie|ogie|lithe|m\u00e8tre|m\u00e9trie|odie|pathie|phie|phone|phore|onyme|th\u00e8que|scope|some|pole|\u00f4me|chromie|pie)s?$");
        private static final Pattern pAdjSuffix = Pattern.compile("(iste|i\u00e8me|uple|issime|aire|esque|atoire|ale|al|able|ible|atif|ique|if|ive|eux|aise|ent|ois|oise|ante|el|elle|ente|oire|ain|aine)s?$");
        private static final Pattern pHasDigit = Pattern.compile("\\d+");
        private static final Pattern pIsDigit = Pattern.compile("^\\d+$");
        private static final Pattern pPosPlural = Pattern.compile("(s|ux)$");
        private static final Pattern pVerbSuffix = Pattern.compile("(ir|er|re|ez|ont|ent|ant|ais|ait|ra|era|eras|\u00e9|\u00e9s|\u00e9es|isse|it)$");
        private static final Pattern pAdvSuffix = Pattern.compile("(iment|ement|emment|amment)$");
        private static final Pattern pHasPunc = Pattern.compile("([!-/:-@\\u005B\\]^-`{-~\u00a1-\u00bf\u2010-\u2027\u2030-\u205e\u20a0-\u20b5])+");
        private static final Pattern pIsPunc = Pattern.compile("([!-/:-@\\u005B\\]^-`{-~\u00a1-\u00bf\u2010-\u2027\u2030-\u205e\u20a0-\u20b5])+$");
        private static final Pattern pAllCaps = Pattern.compile("^[A-Z\\u00C0-\\u00DD]+$");

        private FrenchUnknownWordSignatures() {
        }

        public static String nounSuffix(String s) {
            return pNounSuffix.matcher(s).find() ? "-noun" : "";
        }

        public static String adjSuffix(String s) {
            return pAdjSuffix.matcher(s).find() ? "-adj" : "";
        }

        public static String hasDigit(String s) {
            return pHasDigit.matcher(s).find() ? "-num" : "";
        }

        public static String isDigit(String s) {
            return pIsDigit.matcher(s).find() ? "-isNum" : "";
        }

        public static String verbSuffix(String s) {
            return pVerbSuffix.matcher(s).find() ? "-verb" : "";
        }

        public static String possiblePlural(String s) {
            return pPosPlural.matcher(s).find() ? "-plural" : "";
        }

        public static String advSuffix(String s) {
            return pAdvSuffix.matcher(s).find() ? "-adv" : "";
        }

        public static String hasPunc(String s) {
            return pHasPunc.matcher(s).find() ? "-hpunc" : "";
        }

        public static String isPunc(String s) {
            return pIsPunc.matcher(s).matches() ? "-ipunc" : "";
        }

        public static String isAllCaps(String s) {
            return pAllCaps.matcher(s).matches() ? "-allcap" : "";
        }

        public static String isCapitalized(String s) {
            if (s.length() > 0) {
                Character ch = Character.valueOf(s.charAt(0));
                return Character.isUpperCase(ch.charValue()) ? "-upper" : "";
            }
            return "";
        }
    }
}

