package org.simantics.scl.compiler.completions.parsing;

import java.util.ArrayList;
import java.util.List;

public class RobustModuleSplitter {
    // state ids
    private static final int NORMAL_START_OF_LINE   = 0;
    private static final int NORMAL                 = 1;
    private static final int NORMAL_1QUOTE          = 2;
    private static final int NORMAL_2QUOTE          = 3;
    private static final int SHORT_STRING           = 4;
    private static final int SHORT_STRING_BACKSLASH = 5;
    private static final int LONG_STRING            = 6;
    private static final int LONG_STRING_1QUOTE     = 7;
    private static final int LONG_STRING_2QUOTE     = 8;
    private static final int CHAR_LITERAL           = 9;
    private static final int CHAR_LITERAL_BACKSLASH = 10;
    private static final int NORMAL_1SLASH          = 11;
    private static final int C_COMMENT              = 12;
    private static final int C_COMMENT_STAR         = 13;
    private static final int CPP_COMMENT            = 14;
    
    private final String sourceText;
    private ArrayList<ModuleSegment> segments = new ArrayList<ModuleSegment>();
    
    private RobustModuleSplitter(String sourceText) {
        this.sourceText = sourceText;
    }

    private static boolean isLineEnd(char c) {
        return c == '\n' || c == 0;
    }
    
    private void split() {
        int state = NORMAL;
        int begin = 0, pos = 0, curEntityBegin = 0, parenthesesBalance = 0;
        boolean hasErrors = false;
        int length = sourceText.length();
        loop: while(true) {
            char c = pos == length ? 0 : sourceText.charAt(pos++);
            if(c == '\r')
                c = '\n';
            switch(state) {
            case NORMAL_START_OF_LINE:
                if(c == '\n') // Don't care about empty lines
                    break;
                if(c != ' ') {
                    int end = c == 0 ? pos : pos-1;
                    segments.add(new ModuleSegment(begin, end, parenthesesBalance, hasErrors));
                    parenthesesBalance = 0;
                    hasErrors = false;
                    begin = end;
                }
                state = NORMAL;
            case NORMAL:
                if(c == '"')
                    state = NORMAL_1QUOTE;
                else if(c == '/')
                    state = NORMAL_1SLASH;
                else if(c == '\'')
                    state = CHAR_LITERAL;
                else if(c == '(' || c == '[' || c == '{') 
                    ++parenthesesBalance;
                else if(c == ')' || c == ']' || c == '}') 
                    --parenthesesBalance;
                else if(c == '\n')
                    state = NORMAL_START_OF_LINE;
                else if(c == 0)
                    break loop;
                break;
            case NORMAL_1QUOTE:
                if(c == '"')
                    state = NORMAL_2QUOTE;
                else if(c == '\\')
                    state = SHORT_STRING_BACKSLASH;
                else if(c == 0)
                    break loop;
                else
                    state = SHORT_STRING;
                break;
            case NORMAL_2QUOTE:
                if(c == '"')
                    state = LONG_STRING;
                else {
                    state = NORMAL;
                    if(c != 0)
                        --pos;
                }
                break;
            case SHORT_STRING:
                if(c == '\\')
                    state = SHORT_STRING_BACKSLASH;
                else if(c == '"' || isLineEnd(c) /* unclosed string */) {
                    if(c == '\n')
                        state = NORMAL_START_OF_LINE;
                    else
                        state = NORMAL;
                    hasErrors = c != '"';
                }
                break;
            case SHORT_STRING_BACKSLASH:
                if(isLineEnd(c) /* unclosed string */)
                    state = NORMAL_START_OF_LINE;
                else
                    state = SHORT_STRING;
                break;
            case LONG_STRING:
                if(c == '"')
                    state = LONG_STRING_1QUOTE;
                else if(c == 0) {
                    // Unclosed long string
                    curEntityBegin = pos;
                    state = NORMAL;
                    hasErrors = true;
                }
                break;
            case LONG_STRING_1QUOTE:
                if(c == '"')
                    state = LONG_STRING_2QUOTE;
                else
                    state = LONG_STRING;
                break;
            case LONG_STRING_2QUOTE:
                if(c == '"')
                    state = NORMAL;
                else
                    state = LONG_STRING;
                break;
            case CHAR_LITERAL:
                if(c == '\'' || isLineEnd(c) /* unclosed char literal */) {
                    if(c == '\n')
                        state = NORMAL_START_OF_LINE;
                    else
                        state = NORMAL;
                    hasErrors = c != '\'';
                }
                else if(c == '\\')
                    state = CHAR_LITERAL_BACKSLASH;
                break;
            case CHAR_LITERAL_BACKSLASH:
                if(isLineEnd(c) /* unclosed char literal */) {
                    state = NORMAL_START_OF_LINE;
                    hasErrors = true;
                }
                else
                    state = CHAR_LITERAL;
                break;
            case NORMAL_1SLASH:
                if(c == '/')
                    state = CPP_COMMENT;
                else if(c == '*') {
                    state = C_COMMENT;
                    curEntityBegin = pos;
                }
                else {
                    state = NORMAL;
                    if(c != 0)
                        --pos;
                }
                break;
            case C_COMMENT:
                if(c == '*')
                    state = C_COMMENT_STAR;
                else if(c == 0) {
                    // Unclosed C comment
                    pos = curEntityBegin;
                    state = NORMAL;
                    hasErrors = true;
                }
                break;
            case C_COMMENT_STAR:
                if(c == '/') {
                    state = NORMAL;
                }
                else
                    state = C_COMMENT;
                break;
            case CPP_COMMENT:
                if(isLineEnd(c))
                    state = NORMAL_START_OF_LINE;
                break;
            }
        }
        if(begin != length)
            segments.add(new ModuleSegment(begin, length, parenthesesBalance, hasErrors));
    }
    
    private void combineByParenthesesBalance() {
        ArrayList<ModuleSegment> segmentStack = null; 
        for(ModuleSegment segment : segments)
            if(segment.parenthesesBalance > 0) {
                if(segmentStack == null)
                    segmentStack = new ArrayList<ModuleSegment>();
                for(int i=0;i<segment.parenthesesBalance;++i)
                    segmentStack.add(segment);
            }
            else if(segment.parenthesesBalance < 0) {
                if(segmentStack == null) {
                    segment.parenthesesBalance = 0;
                    segment.hasErrors = true;
                }
                else {
                    int r = -segment.parenthesesBalance;
                    while(r > 0 && !segmentStack.isEmpty()) {
                        segmentStack.remove(segmentStack.size()-1);
                        --r;
                    }
                    if(r > 0) {
                        segment.parenthesesBalance += r;
                        segment.hasErrors = true;
                    }
                }
            }
        if(segmentStack == null)
            return;
        for(ModuleSegment segment : segmentStack) {
            --segment.parenthesesBalance;
            segment.hasErrors = true;
        }
        
        ArrayList<ModuleSegment> oldSegments = segments;
        segments = new ArrayList<ModuleSegment>(oldSegments.size());
        
        int currentBalance = 0;
        int begin = 0;
        boolean hasErrors = false;
        for(ModuleSegment segment : oldSegments) {
            if(currentBalance == 0) {
                if(segment.parenthesesBalance == 0)
                    segments.add(segment);
                else {
                    begin = segment.begin;
                    currentBalance = segment.parenthesesBalance;
                    hasErrors = segment.hasErrors;
                }
            }
            else {
                currentBalance += segment.parenthesesBalance;
                hasErrors |= segment.hasErrors;
                if(currentBalance == 0)
                    segments.add(new ModuleSegment(begin, segment.end, 0, hasErrors));
            }
        }
    }
    
    public static List<ModuleSegment> split(String sourceText) {
        RobustModuleSplitter splitter = new RobustModuleSplitter(sourceText);
        splitter.split();
        splitter.combineByParenthesesBalance();
        return splitter.segments;
    }
}

