/**
 * Normalizes Arabic text by:
 * - Converting similar characters to standard forms
 * - Converting Arabic numerals to English numerals
 * - Standardizing character variations
 *
 * @param {string} str - The input Arabic string to be normalized.
 * @returns {string} - The normalized Arabic string.
 */

export const normalizeString = (str) => {
    if (typeof str !== "string") return str;

    // Mapping object for character normalization
    const normalizationMap = {
        // Alef variations
        "\u0622": "\u0627", // آ → ا
        "\u0623": "\u0627", // أ → ا
        "\u0625": "\u0627", // إ → ا
        "\u0671": "\u0627", // ٱ → ا (Quranic Alef)
        // Teh variations
        "\u0629": "\u0647", // ة → ه
        // Hamza variations
        "\u0624": "\u0648", // ؤ → و
        "\u0626": "\u064A", // ئ → ي
        "\u0649": "\u064A", // ى → ي (Alif Maksura to Ya)

        // Arabic numerals
        "\u0660": "0", // ٠ → 0
        "\u0661": "1", // ١ → 1
        "\u0662": "2", // ٢ → 2
        "\u0663": "3", // ٣ → 3
        "\u0664": "4", // ٤ → 4
        "\u0665": "5", // ٥ → 5
        "\u0666": "6", // ٦ → 6
        "\u0667": "7", // ٧ → 7
        "\u0668": "8", // ٨ → 8
        "\u0669": "9", // ٩ → 9

        // Superscript alef
        "\u0670": "", //  ٰ superscript alef → empty
        // Tatweel
        "\u0640": "", // ـ tatweel → empty
    };

    // Basic Arabic diacritics (covers most cases) like fatha, kasra, damma, etc.
    const diacriticsPattern = /[\u064B-\u0652]/g;

    // Regex pattern for normalization
    // prettier-ignore
    // eslint-disable-next-line no-misleading-character-class
    const normalizationPattern = new RegExp(`[${Object.keys(normalizationMap).join("")}]`,"g");

    // replace the characters with their normalized form and then normalize the string
    return str
        .replace(normalizationPattern, (match) => normalizationMap[match])
        .replace(diacriticsPattern, "")
        .normalize("NFKC");
};