/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.seg.CRF;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.model.CRFSegmentModel;
import com.hankcs.hanlp.model.crf.CRFModel;
import com.hankcs.hanlp.model.crf.FeatureFunction;
import com.hankcs.hanlp.model.crf.Table;
import com.hankcs.hanlp.seg.CharacterBasedSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.CharacterHelper;
import com.hankcs.hanlp.utility.GlobalObjectPool;
import com.hankcs.hanlp.utility.Predefine;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

public class CRFSegment
extends CharacterBasedSegment {
    private CRFModel crfModel;

    public CRFSegment(CRFSegmentModel crfModel) {
        this.crfModel = crfModel;
    }

    public CRFSegment(String modelPath) {
        Predefine.logger.warning("\u5df2\u5e9f\u5f03CRFSegment\uff0c\u8bf7\u4f7f\u7528\u529f\u80fd\u66f4\u4e30\u5bcc\u3001\u8bbe\u8ba1\u66f4\u4f18\u96c5\u7684CRFLexicalAnalyzer");
        this.crfModel = (CRFModel)GlobalObjectPool.get(modelPath);
        if (this.crfModel != null) {
            return;
        }
        Predefine.logger.info("CRF\u5206\u8bcd\u6a21\u578b\u6b63\u5728\u52a0\u8f7d " + modelPath);
        long start = System.currentTimeMillis();
        this.crfModel = CRFModel.loadTxt(modelPath, new CRFSegmentModel(new BinTrie<FeatureFunction>()));
        if (this.crfModel == null) {
            String error = "CRF\u5206\u8bcd\u6a21\u578b\u52a0\u8f7d " + modelPath + " \u5931\u8d25\uff0c\u8017\u65f6 " + (System.currentTimeMillis() - start) + " ms";
            Predefine.logger.severe(error);
            throw new IllegalArgumentException(error);
        }
        Predefine.logger.info("CRF\u5206\u8bcd\u6a21\u578b\u52a0\u8f7d " + modelPath + " \u6210\u529f\uff0c\u8017\u65f6 " + (System.currentTimeMillis() - start) + " ms");
        GlobalObjectPool.put(modelPath, this.crfModel);
    }

    public CRFSegment() {
        this(HanLP.Config.CRFSegmentModelPath);
    }

    @Override
    protected List<Term> roughSegSentence(char[] sentence) {
        if (sentence.length == 0) {
            return Collections.emptyList();
        }
        char[] sentenceConverted = CharTable.convert(sentence);
        Table table = new Table();
        table.v = CRFSegment.atomSegmentToTable(sentenceConverted);
        this.crfModel.tag(table);
        LinkedList<Term> termList = new LinkedList<Term>();
        if (HanLP.Config.DEBUG) {
            System.out.println("CRF\u6807\u6ce8\u7ed3\u679c");
            System.out.println(table);
        }
        int offset = 0;
        block3: for (int i = 0; i < table.v.length; ++i) {
            String[] line = table.v[i];
            switch (line[2].charAt(0)) {
                case 'B': {
                    int begin = offset;
                    while (table.v[i][2].charAt(0) != 'E') {
                        offset += table.v[i][1].length();
                        if (++i != table.v.length) continue;
                    }
                    if (i == table.v.length) {
                        termList.add(new Term(new String(sentence, begin, offset - begin), CRFSegment.toDefaultNature(table.v[i][0])));
                        break block3;
                    }
                    termList.add(new Term(new String(sentence, begin, offset - begin + table.v[i][1].length()), CRFSegment.toDefaultNature(table.v[i][0])));
                    break;
                }
                default: {
                    termList.add(new Term(new String(sentence, offset, table.v[i][1].length()), CRFSegment.toDefaultNature(table.v[i][0])));
                }
            }
            offset += table.v[i][1].length();
        }
        return termList;
    }

    protected static Nature toDefaultNature(String compiledChar) {
        if (compiledChar.equals("M")) {
            return Nature.m;
        }
        if (compiledChar.equals("W")) {
            return Nature.nx;
        }
        return null;
    }

    public static List<String> atomSegment(char[] sentence) {
        ArrayList<String> atomList = new ArrayList<String>(sentence.length);
        int maxLen = sentence.length - 1;
        StringBuilder sbAtom = new StringBuilder();
        block0: for (int i = 0; i < sentence.length; ++i) {
            char c;
            if (sentence[i] >= '0' && sentence[i] <= '9') {
                sbAtom.append(sentence[i]);
                if (i == maxLen) {
                    atomList.add(sbAtom.toString());
                    sbAtom.setLength(0);
                    break;
                }
                c = sentence[++i];
                while (c == '.' || c == '%' || c >= '0' && c <= '9') {
                    sbAtom.append(sentence[i]);
                    if (i == maxLen) {
                        atomList.add(sbAtom.toString());
                        sbAtom.setLength(0);
                        break block0;
                    }
                    c = sentence[++i];
                }
                atomList.add(sbAtom.toString());
                sbAtom.setLength(0);
                --i;
                continue;
            }
            if (CharacterHelper.isEnglishLetter(sentence[i])) {
                sbAtom.append(sentence[i]);
                if (i == maxLen) {
                    atomList.add(sbAtom.toString());
                    sbAtom.setLength(0);
                    break;
                }
                c = sentence[++i];
                while (CharacterHelper.isEnglishLetter(c)) {
                    sbAtom.append(sentence[i]);
                    if (i == maxLen) {
                        atomList.add(sbAtom.toString());
                        sbAtom.setLength(0);
                        break block0;
                    }
                    c = sentence[++i];
                }
                atomList.add(sbAtom.toString());
                sbAtom.setLength(0);
                --i;
                continue;
            }
            atomList.add(String.valueOf(sentence[i]));
        }
        return atomList;
    }

    public static String[][] atomSegmentToTable(char[] sentence) {
        String[][] table = new String[sentence.length][3];
        int size = 0;
        int maxLen = sentence.length - 1;
        StringBuilder sbAtom = new StringBuilder();
        block0: for (int i = 0; i < sentence.length; ++i) {
            char c;
            if (sentence[i] >= '0' && sentence[i] <= '9') {
                sbAtom.append(sentence[i]);
                if (i == maxLen) {
                    table[size][0] = "M";
                    table[size][1] = sbAtom.toString();
                    ++size;
                    sbAtom.setLength(0);
                    break;
                }
                c = sentence[++i];
                while (c == '.' || c == '%' || c >= '0' && c <= '9') {
                    sbAtom.append(sentence[i]);
                    if (i == maxLen) {
                        table[size][0] = "M";
                        table[size][1] = sbAtom.toString();
                        ++size;
                        sbAtom.setLength(0);
                        break block0;
                    }
                    c = sentence[++i];
                }
                table[size][0] = "M";
                table[size][1] = sbAtom.toString();
                ++size;
                sbAtom.setLength(0);
                --i;
                continue;
            }
            if (CharacterHelper.isEnglishLetter(sentence[i]) || sentence[i] == ' ') {
                sbAtom.append(sentence[i]);
                if (i == maxLen) {
                    table[size][0] = "W";
                    table[size][1] = sbAtom.toString();
                    ++size;
                    sbAtom.setLength(0);
                    break;
                }
                c = sentence[++i];
                while (CharacterHelper.isEnglishLetter(c) || c == ' ') {
                    sbAtom.append(sentence[i]);
                    if (i == maxLen) {
                        table[size][0] = "W";
                        table[size][1] = sbAtom.toString();
                        ++size;
                        sbAtom.setLength(0);
                        break block0;
                    }
                    c = sentence[++i];
                }
                table[size][0] = "W";
                table[size][1] = sbAtom.toString();
                ++size;
                sbAtom.setLength(0);
                --i;
                continue;
            }
            String string = String.valueOf(sentence[i]);
            table[size][1] = string;
            table[size][0] = string;
            ++size;
        }
        return CRFSegment.resizeArray(table, size);
    }

    private static String[][] resizeArray(String[][] array, int size) {
        if (array.length == size) {
            return array;
        }
        String[][] nArray = new String[size][];
        System.arraycopy(array, 0, nArray, 0, size);
        return nArray;
    }

    @Override
    public Segment enableNumberQuantifierRecognize(boolean enable) {
        throw new UnsupportedOperationException("\u6682\u4e0d\u652f\u6301");
    }
}

