/*
 * Decompiled with CFR 0.152.
 */
package io.github.lnyocly.ai4j.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RecursiveCharacterTextSplitter {
    private static final Logger log = LoggerFactory.getLogger(RecursiveCharacterTextSplitter.class);
    private List<String> separators;
    private int chunkSize = 500;
    private int chunkOverlap = 50;

    public RecursiveCharacterTextSplitter(List<String> separators, int chunkSize, int chunkOverlap) {
        this.separators = separators == null ? Arrays.asList("\n\n", "\n", " ", "") : separators;
        this.chunkSize = chunkSize;
        this.chunkOverlap = chunkOverlap;
    }

    public RecursiveCharacterTextSplitter(int chunkSize, int chunkOverlap) {
        this.separators = Arrays.asList("\n\n", "\n", " ", "");
        this.chunkSize = chunkSize;
        this.chunkOverlap = chunkOverlap;
    }

    public List<String> splitText(String text) {
        ArrayList<String> finalChunks = new ArrayList<String>();
        String separator = this.separators.get(this.separators.size() - 1);
        for (String s : this.separators) {
            if (!text.contains(s) && !s.isEmpty()) continue;
            separator = s;
            break;
        }
        List<String> splits = Arrays.asList(text.split(separator));
        ArrayList<String> goodSplits = new ArrayList<String>();
        for (String s : splits) {
            if (s.length() < this.chunkSize) {
                goodSplits.add(s);
                continue;
            }
            if (!goodSplits.isEmpty()) {
                List<String> mergedText = this.mergeSplits(goodSplits, separator);
                finalChunks.addAll(mergedText);
                goodSplits.clear();
            }
            List<String> otherInfo = this.splitText(s);
            finalChunks.addAll(otherInfo);
        }
        if (!goodSplits.isEmpty()) {
            List<String> mergedText = this.mergeSplits(goodSplits, separator);
            finalChunks.addAll(mergedText);
        }
        return finalChunks;
    }

    private List<String> mergeSplits(List<String> splits, String separator) {
        int separatorLen = separator.length();
        ArrayList<String> docs = new ArrayList<String>();
        ArrayList<String> currentDoc = new ArrayList<String>();
        int total = 0;
        for (String d : splits) {
            int len = d.length();
            if (total + len + (separatorLen > 0 && !currentDoc.isEmpty() ? separatorLen : 0) > this.chunkSize) {
                if (total > this.chunkSize) {
                    log.warn("Warning: Created a chunk of size {}, which is longer than the specified {}", (Object)total, (Object)this.chunkSize);
                }
                if (!currentDoc.isEmpty()) {
                    String doc = this.joinDocs(currentDoc, separator);
                    if (doc != null) {
                        docs.add(doc);
                    }
                    while (total > this.chunkOverlap || total + len + (separatorLen > 0 && !currentDoc.isEmpty() ? separatorLen : 0) > this.chunkSize && total > 0) {
                        total -= ((String)currentDoc.get(0)).length() + (separatorLen > 0 && currentDoc.size() > 1 ? separatorLen : 0);
                        currentDoc.remove(0);
                    }
                }
            }
            currentDoc.add(d);
            total += len + (separatorLen > 0 && currentDoc.size() > 1 ? separatorLen : 0);
        }
        String doc = this.joinDocs(currentDoc, separator);
        if (doc != null) {
            docs.add(doc);
        }
        return docs;
    }

    private String joinDocs(List<String> docs, String separator) {
        if (docs.isEmpty()) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < docs.size(); ++i) {
            sb.append(docs.get(i));
            if (i >= docs.size() - 1) continue;
            sb.append(separator);
        }
        return sb.toString();
    }
}

