import _ from "lodash";
import axios from "axios";
import {
  ModuleTextToSpeechAudioIndex,
  ModuleTextToSpeechLayerItem,
  ModuleTextToSpeechLayerTypes,
} from "@/models/module";
import { ref, computed, inject } from "vue";
import { audioState } from "./webgl";
import { useEvent } from "./event";
import { ProjectAvatar, ProjectAvatarVoice } from "@/models/project";
import { useUtils } from "./utils";
import {
  SlideCustomAnimations,
  SlidePronunciationsPhonemeTypes,
} from "@/models/slide";

const layers = ref<Array<ModuleTextToSpeechLayerItem>>([]);
const text = ref("");
const initialized = ref(false);
const showAnimationList = ref(false);
const updatedText = ref("");

const ssml = ref("");
const avatarVoiceOriginal = ref<ProjectAvatarVoice | null>(null);
const avatarVoice = ref<ProjectAvatarVoice | null>(null);
const audioIndexes = ref<Array<ModuleTextToSpeechAudioIndex>>([]);
const audioIndexesWithoutPauses = ref<Array<ModuleTextToSpeechAudioIndex>>([]);

const timeToSubtract = ref(0);

const currentSelection = ref<{
  word: string;
  wordIndex: number;
  type: ModuleTextToSpeechLayerTypes;
} | null>(null);

const contextMenuCurrentSelection = ref<{
  word: string;
  wordIndex: number;
  type: ModuleTextToSpeechLayerTypes;
} | null>(null);

const layersByType = computed(() =>
  _.groupBy(_.orderBy(layers.value, "wordIndex", "asc"), "type")
);
const durationWithPauses = computed(() => {
  let pausesSum = 0;
  _.forEach(layersByType.value.pause, (pause) => {
    if (pause.meta.pause) {
      const { durationBefore, durationAfter } = pause.meta.pause;
      pausesSum += durationBefore + durationAfter;
    }
  });
  return (
    audioState.value.duration - timeToSubtract.value / 1000 + pausesSum / 1000
  );
});
const currentSelectionLayer = computed(() => {
  return _.find(
    layers.value,
    (_layer) =>
      !!currentSelection.value &&
      _layer.wordIndex === currentSelection.value.wordIndex &&
      _layer.type === currentSelection.value.type
  );
});

let source: any = null;

export const useTextToSpeech = () => {
  const events = inject("events") as EventsEnum;
  const endpoints = inject("endpoints") as EndpointsEnum;
  const eventComposable = useEvent();
  const utilsComposable = useUtils();

  function init(_text: string, _layers: Array<ModuleTextToSpeechLayerItem>) {
    setLayers(_layers);
    setText(_text);
    initialized.value = true;
    eventComposable.emit(events.TEXT_TO_SPEECH_INITIALIZED);
  }

  function destroy() {
    stopVoiceRequest();
    setLayers([]);
    setText("");
    setSelected(null);
    timeToSubtract.value = 0;
    audioIndexes.value = [];
    initialized.value = false;
    avatarVoice.value = null;
    avatarVoiceOriginal.value = null;
    ssml.value = "";
  }

  function stopVoiceRequest() {
    if (source) source.cancel();
  }

  function setLayers(_layers: Array<ModuleTextToSpeechLayerItem>) {
    layers.value = _.cloneDeep(_layers);
  }

  function setText(_text: string) {
    text.value = _text;
  }

  function getLayer(_wordIndex: number, _type: ModuleTextToSpeechLayerTypes) {
    return _.find(
      layers.value,
      (_layer) => _layer.wordIndex === _wordIndex && _layer.type === _type
    );
  }

  function getLayersByWordIndex(_wordIndex: number) {
    return _.filter(layers.value, (_layer) => _layer.wordIndex === _wordIndex);
  }

  function addLayer(_layer: ModuleTextToSpeechLayerItem, _force = false) {
    const layer = getLayer(_layer.wordIndex, _layer.type);
    if (layer && !_force) {
      layer.meta = _layer.meta;
      layer.word = _layer.word;
    } else {
      layers.value.push(_layer);
    }
  }

  function clearLayersByType(layerType: ModuleTextToSpeechLayerTypes) {
    layers.value = _.filter(layers.value, (layer) => layer.type !== layerType);
  }

  function removeLayer(
    _wordIndex: number,
    _type?: ModuleTextToSpeechLayerTypes
  ) {
    layers.value = _.filter(layers.value, (layer) => {
      let valid = layer.wordIndex == _wordIndex;
      if (_type && valid) valid = layer.type == _type;
      return !valid;
    });
  }

  function makeAudioIndexes() {
    if (!avatarVoice.value) {
      audioIndexes.value = [];
      return;
    }

    const arr: Array<ModuleTextToSpeechAudioIndex> = [];

    const endPause = getLayer(avatarVoice.value.timeIndexes.length, "pause");

    if (!audioIndexes.value.length) {
      timeToSubtract.value = 0;
      _.forEach(avatarVoice.value.timeIndexes, (word, wordIndex) => {
        const obj = {
          wordIndex,
          time: word.time,
          word: word.value,
          typesAvailable: [],
        };

        const layer = getLayer(wordIndex, "pause");
        if (layer && layer.meta.pause) {
          timeToSubtract.value += layer.meta.pause.durationBefore;
        }

        const previousLayer = getLayer(wordIndex - 1, "pause");
        if (previousLayer && previousLayer.meta.pause) {
          timeToSubtract.value += previousLayer.meta.pause.durationAfter;
        }

        obj.time -= timeToSubtract.value;

        arr.push(obj);
      });

      const lastLayer = getLayer(arr.length - 1, "pause");
      if (lastLayer && lastLayer.meta.pause) {
        const { durationAfter } = lastLayer.meta.pause;
        const toAdd = durationAfter / arr.length;
        let sum = 0;
        _.forEach(arr, (item) => {
          if (item.wordIndex) {
            sum += toAdd;
            item.time += toAdd;
          }
        });

        arr[arr.length - 1].time += durationAfter - sum;
      }

      audioIndexesWithoutPauses.value = [];
      _.forEach(arr, (obj) => {
        audioIndexesWithoutPauses.value.push(_.cloneDeep(obj));
      });
    } else {
      _.forEach(audioIndexesWithoutPauses.value, (obj) => {
        arr.push(_.cloneDeep(obj));
      });
    }

    let toAdd = 0;

    _.forEach(arr, (word) => {
      word.time += toAdd;
      const layer = getLayer(word.wordIndex, "pause");
      if (layer && layer.meta.pause) {
        toAdd +=
          layer.meta.pause.durationBefore + layer.meta.pause.durationAfter;
      }
    });

    audioIndexes.value = arr;

    _.forEach(audioIndexes.value, (word) => {
      word.typesAvailable = getWordIndexAvailableTypes(word.wordIndex);
      if (endPause && endPause.wordIndex - 1 === word.wordIndex) {
        word.typesAvailable = _.filter(
          word.typesAvailable,
          (type) => type != "pause"
        );
      }
    });
  }

  function getWordIndexAvailableTypes(_wordIndex: number) {
    const availableLayers = _.difference(
      [
        "pause",
        "pronunciation",
        "animation",
        "media",
      ] as Array<ModuleTextToSpeechLayerTypes>,
      _.map(
        _.filter(layers.value, (_layer) => _layer.wordIndex === _wordIndex),
        (_layer) => _layer.type
      )
    );

    if (
      availableLayers.includes("animation") &&
      !checkLayerFitsBetween(_wordIndex, "animation")
    ) {
      return _.filter(availableLayers, (_type) => _type != "animation");
    }

    return availableLayers;
  }

  function checkLayersDurations() {
    _.forEach(["pause", "media"], (type) => {
      _.forEach(layersByType.value[type], (_layer, index) => {
        const next = layersByType.value[type][index + 1];
        const nextLayerTime = next
          ? getLayerStartTime(next.wordIndex, type === "media")
          : durationWithPauses.value;

        let max = (nextLayerTime - getLayerStartTime(_layer.wordIndex)) * 1000;

        if (type == "media" && _layer.meta.media) {
          const layerPause = getLayer(_layer.wordIndex, "pause");
          let pauseSec = 0;
          if (layerPause && layerPause.meta.pause && _layer.type !== "pause") {
            pauseSec = layerPause.meta.pause.durationBefore;
          }
          max -= pauseSec;

          if (!_layer.meta.media.duration) {
            _layer.meta.media.duration = durationWithPauses.value * 1000;
          } else if (max > _layer.meta.media.duration) {
            max = _layer.meta.media.duration;
          }
          _layer.meta.media.timelineDuration = max;
        }
      });
    });
  }

  function getAudioIndex(_wordIndex: number) {
    return _.find(
      audioIndexes.value,
      (audioIndex) => audioIndex.wordIndex == _wordIndex
    );
  }

  function getLayerStartTime(
    _wordIndex: number,
    _addPausePauseDuration = false,
    _addAnimationOffset = false
  ) {
    let time = 0;
    const audioIndexObj = getAudioIndex(_wordIndex);
    if (audioIndexObj) time = audioIndexObj.time;
    if (_addPausePauseDuration) {
      const layerPause = getLayer(_wordIndex, "pause");
      if (layerPause && layerPause.meta.pause) {
        time += layerPause.meta.pause.durationBefore;
      }
    }
    if (_addAnimationOffset) {
      const layerAnimation = getLayer(_wordIndex, "animation");
      if (layerAnimation && layerAnimation.meta.animation) {
        time += layerAnimation.meta.animation.offset;
      }
    }
    return time / 1000;
  }

  function checkLayerFitsBetween(
    _wordIndex: number,
    _type: ModuleTextToSpeechLayerTypes,
    _layerToCheck?: ModuleTextToSpeechLayerItem
  ) {
    const layerToCheckStartTime = getLayerStartTime(_wordIndex, false, true);

    const animationLayers = _.filter(
      layers.value,
      (_layer) =>
        _layer.type === "animation" &&
        _wordIndex != _layer.wordIndex &&
        (_layerToCheck ? _layer.wordIndex != _layerToCheck.wordIndex : true)
    );

    const nextAnimationLayers = _.orderBy(
      _.filter(animationLayers, (_layer) => _layer.wordIndex > _wordIndex),
      "wordIndex",
      "asc"
    );

    const nextLayer = nextAnimationLayers[0];

    let duration = 0;

    if (nextLayer) {
      if (!_layerToCheck) _layerToCheck = getLayer(_wordIndex, _type);
      if (
        _layerToCheck &&
        _layerToCheck.meta.animation &&
        _layerToCheck.meta.animation.duration
      ) {
        duration = _layerToCheck.meta.animation.duration / 1000;
      }
      const nextLayerStartAt = getLayerStartTime(
        nextLayer.wordIndex,
        false,
        true
      );
      const layerEndsAt = layerToCheckStartTime + duration;
      if (layerEndsAt > nextLayerStartAt) return false;
    }

    const previousAnimationLayers = _.orderBy(
      _.filter(animationLayers, (_layer) => _layer.wordIndex < _wordIndex),
      "wordIndex",
      "desc"
    );

    const previousLayer = previousAnimationLayers[0];

    if (previousLayer) {
      if (
        previousLayer.meta.animation &&
        previousLayer.meta.animation.duration
      ) {
        duration = previousLayer.meta.animation.duration / 1000;
      }
      const previousLayerEndsAt =
        getLayerStartTime(previousLayer.wordIndex, false, true) + duration;
      if (previousLayerEndsAt > layerToCheckStartTime) return false;
    }

    return true;
  }

  function setSelected(
    obj: {
      word: string;
      wordIndex: number;
      type: ModuleTextToSpeechLayerTypes;
    } | null,
    fromContextMenu = false
  ) {
    if (fromContextMenu) {
      contextMenuCurrentSelection.value = obj;
    } else {
      currentSelection.value = obj;
    }
  }

  function secondsToPercentage(seconds: number) {
    const percentage = (seconds * 100) / durationWithPauses.value;
    return percentage < 0 ? 0 : percentage;
  }

  function removeCurrentSelection(fromContextMenu = false) {
    const selection = fromContextMenu
      ? contextMenuCurrentSelection.value
      : currentSelectionLayer.value;
    if (!selection) return;
    const { wordIndex, type } = selection;
    setSelected(null);
    removeLayer(wordIndex, type);
  }

  function timelineCalcGeneralValues() {
    eventComposable.emit(events.TIMELINE_CALC_GENERAL_VALUES);
  }

  function getGridColStyle(index: number) {
    let paddingLeft = 0;
    let paddingRight = 0;
    let width = 0;

    if (audioIndexes.value.length && audioIndexes.value[index]) {
      const audioIndex = audioIndexes.value[index];
      const audioIndexTime = audioIndex.time / 1000;
      const nextAudioIndex = audioIndexes.value[index + 1];

      const nextAudioIndexTime = nextAudioIndex
        ? nextAudioIndex.time / 1000
        : durationWithPauses.value;

      width = secondsToPercentage(nextAudioIndexTime - audioIndexTime);

      const layer = getLayer(audioIndex.wordIndex, "pause");
      if (layer && layer.meta.pause) {
        paddingLeft = secondsToPercentage(
          layer.meta.pause.durationBefore / 1000
        );
        paddingRight = secondsToPercentage(
          layer.meta.pause.durationAfter / 1000
        );
      }
    }

    return {
      paddingLeft: `${paddingLeft}%`,
      paddingRight: `${paddingRight}%`,
      width: `${width}%`,
      originalValues: {
        paddingLeft,
        paddingRight,
        width,
      },
    };
  }

  async function getAvatarVoice(
    force = false,
    avatar?: ProjectAvatar,
    customAnimations?: Array<SlideCustomAnimations>
  ) {
    if (!text.value) return;

    const cloneText = _.cloneDeep(text.value);

    const newSSML = generateSSML(text.value, layers.value);
    if ((newSSML === ssml.value && !force) || !avatar) return;
    ssml.value = newSSML;

    return await getVoice(
      avatar,
      text.value,
      ssml.value,
      false,
      customAnimations
    )
      .then((response) => {
        let hasPauseError = false;

        const words: Array<{ value: string; index: number }> = [];

        for (
          let wordIndex = 0;
          wordIndex < response.timeIndexes.length;
          wordIndex++
        ) {
          const timeIndex = response.timeIndexes[wordIndex];

          const indexOfSameWords = _.map(
            _.filter(words, (word) => word.value === timeIndex.value),
            (word) => word.index
          );

          hasPauseError = !!_.find(
            layers.value,
            (layer) =>
              indexOfSameWords.includes(layer.wordIndex) &&
              layer.type === "pause"
          );

          if (!hasPauseError && indexOfSameWords.length) {
            hasPauseError = !!getLayer(wordIndex, "pause");
          }

          if (hasPauseError) break;

          words.push({
            value: timeIndex.value,
            index: wordIndex,
          });
        }

        if (hasPauseError) {
          ssml.value = generateSSMLFromTimeIndexes(response);
          return getVoice(avatar, text.value, ssml.value);
        } else {
          return response;
        }
      })
      .then((response) => {
        let toReduce = response.timeIndexes.length
          ? response.timeIndexes[0].time
          : 0;
        const firstIndexPause = getLayer(0, "pause");

        if (firstIndexPause && firstIndexPause.meta.pause) {
          toReduce -= firstIndexPause.meta.pause.durationBefore;
        }

        _.forEach(response.timeIndexes, (item) => (item.time -= toReduce));

        avatarVoiceOriginal.value = _.cloneDeep(response);
        avatarVoice.value = _.cloneDeep(response);
        if (cloneText === text.value) {
          eventComposable.emit(events.TEXT_TO_SPEECH_RENDER_LAYERS);
        }
      });
  }

  function getVoice(
    _avatar: ProjectAvatar,
    _text: string,
    _ssml?: string | null,
    _ignoreCache?: boolean,
    _customAnimations?: Array<SlideCustomAnimations>
  ) {
    // Extracting Archetype from PrefabName
    const archetype = _avatar.prefabName.split("_")[0];

    source = axios.CancelToken.source();
    return axios
      .post(
        endpoints.AVATARS.TEXT_SPEECH,
        {
          VoiceService: _avatar.voiceService,
          Voice: _avatar.voice,
          Text: utilsComposable.replaceTextSpaces(_text),
          Pronunciation: _ssml,
          Format: "mp3",
          addSpectrumData: true,
          Pitch: _avatar.pitch,
          Rate: _avatar.rate,
          IgnoreCache: _ignoreCache,
          CustomAnimations: _customAnimations || [],
          Archetype: archetype,
        },
        { cancelToken: source.token }
      )
      .then((response: any) => {
        source = null;
        return response.payload as ProjectAvatarVoice;
      })
      .catch((error) => {
        throw error;
      });
  }

  function generateSSMLFromTimeIndexes(avatarVoice: ProjectAvatarVoice) {
    let pausesAndPronunciationTags = "";
    let formatedText = text.value;

    _.forEach(avatarVoice.timeIndexes, (word, index) => {
      const startAt = formatedText.indexOf(word.value);
      if (startAt >= 0) {
        const endAt = startAt + word.value.length;
        let toAdd = formatedText.substring(0, endAt);
        formatedText = formatedText.substring(endAt, formatedText.length + 1);
        const pronunciationLayer = _.find(
          layers.value,
          (layer) => layer.word === word.value && layer.type === "pronunciation"
        );

        const pauseLayer = getLayer(index, "pause");

        if (pronunciationLayer && pronunciationLayer.meta.pronunciation) {
          toAdd = wordReplace(
            toAdd,
            word.value,
            pronunciationLayer.meta.pronunciation.phonemeType
              ? generateSsmlPhonemeTag(
                  word.value,
                  pronunciationLayer.meta.pronunciation.value,
                  pronunciationLayer.meta.pronunciation.phonemeType
                )
              : generateSsmlSubTag(
                  word.value,
                  pronunciationLayer.meta.pronunciation.value
                )
          );
        }

        if (pauseLayer && pauseLayer.meta.pause) {
          const { durationBefore, durationAfter } = pauseLayer.meta.pause;

          if (durationBefore) {
            toAdd = `${generateSsmlBreakTag(durationBefore)}${toAdd}`;
          }

          if (durationAfter) {
            toAdd = `${toAdd}${generateSsmlBreakTag(durationAfter)}`;
          }
        }

        pausesAndPronunciationTags += toAdd;
      }
    });

    pausesAndPronunciationTags = pausesAndPronunciationTags + formatedText;

    return generateSsmlSpeakTag(pausesAndPronunciationTags);
  }

  function generateSSML(
    text: string,
    layers: Array<ModuleTextToSpeechLayerItem>
  ) {
    let pausesAndPronunciationTags = "";
    let formatedText = text.replace(/\xA0/g, " ");

    const pronunciations = _.filter(layers, (l) => l.type === "pronunciation");
    const pauses = _.filter(layers, (l) => l.type === "pause");

    const isWrongIndex = (text: string, word: string, index: number) => {
      if (index === -1) return false;
      if (index > 0 && index + word.length < text.length) {
        const beforeChar = text[index - 1];
        const afterChar = text[index + word.length];
        if (!/^\s*$/.test(beforeChar) || !/^\s*$/.test(afterChar)) {
          if (/^[a-zA-Z]+$/.test(beforeChar) || /^[a-zA-Z]+$/.test(afterChar)) {
            return true;
          }
        }
      }
      return false;
    };

    _.forEach(_.orderBy(pauses, "wordIndex", "asc"), (layer): any => {
      const startAt = formatedText.indexOf(layer.word);

      if (startAt >= 0) {
        let toAdd;
        const endAt = startAt + layer.word.length;

        if (isWrongIndex(formatedText, layer.word, startAt)) {
          const textSplited = formatedText.split(" ");
          const wordsBefore: Array<string> = [];

          _.every(textSplited, (t) => {
            wordsBefore.push(t);
            if (t === layer.word) return false;
            return true;
          });

          toAdd = wordsBefore.join(" ");
        } else {
          toAdd = formatedText.substring(0, endAt);
        }

        formatedText = formatedText.substring(endAt, formatedText.length + 1);

        if (layer.meta.pause) {
          const { durationBefore, durationAfter } = layer.meta.pause;
          let breakTag = layer.word;

          if (durationBefore) {
            breakTag = `${generateSsmlBreakTag(durationBefore)}${breakTag}`;
          }

          if (durationAfter) {
            breakTag = `${breakTag}${generateSsmlBreakTag(durationAfter)}`;
          }

          toAdd = wordReplace(toAdd, layer.word, breakTag);
        }

        pausesAndPronunciationTags += toAdd;
      }
    });

    pausesAndPronunciationTags = pausesAndPronunciationTags + formatedText;

    const pronunciationsStrings: { [hash: string]: string } = {};

    _.forEach(pronunciations, (layer) => {
      if (layer.meta.pronunciation) {
        const hash = (Math.random() + 1).toString(36).substring(7).toString();
        pronunciationsStrings[hash] = layer.meta.pronunciation.phonemeType
          ? generateSsmlPhonemeTag(
              layer.word,
              layer.meta.pronunciation.value,
              layer.meta.pronunciation.phonemeType
            )
          : generateSsmlSubTag(layer.word, layer.meta.pronunciation.value);

        pausesAndPronunciationTags = wordReplace(
          pausesAndPronunciationTags,
          layer.word,
          hash
        );
      }
    });

    _.forEach(pronunciationsStrings, (tag, hash) => {
      pausesAndPronunciationTags = pausesAndPronunciationTags.replaceAll(
        hash,
        tag
      );
    });

    return generateSsmlSpeakTag(pausesAndPronunciationTags);
  }

  function generateSsmlSubTag(word: string, pronunciation: string) {
    return `<sub alias="${pronunciation}">${word}</sub>`;
  }

  function generateSsmlBreakTag(time: number) {
    return `<break time="${time}ms"/>`;
  }

  function generateSsmlPhonemeTag(
    word: string,
    phoneme: string,
    type: SlidePronunciationsPhonemeTypes
  ) {
    return `<phoneme alphabet="${type.toLowerCase()}" ph="${phoneme}">${word}</phoneme>`;
  }

  function generateSsmlSpeakTag(tags: string) {
    return `<speak xmlns="http://www.w3.org/2001/10/synthesis">${tags}</speak>`;
  }

  function wordReplace(text: string, oldValue: string, newValue: string) {
    const sanitizedOldValue = oldValue.replace(/[$()*+.[\]?\\^{|}]/g, "\\$&");
    const re = new RegExp(
      `(?<![\\w\\d])${sanitizedOldValue.replaceAll("$", "\\$")}(?![\\w\\d])`,
      "g"
    );
    return text.replace(re, newValue);
  }

  return {
    init,
    destroy,
    setLayers,
    setText,
    getLayer,
    addLayer,
    removeLayer,
    makeAudioIndexes,
    checkLayersDurations,
    getLayerStartTime,
    checkLayerFitsBetween,
    setSelected,
    getAudioIndex,
    secondsToPercentage,
    removeCurrentSelection,
    timelineCalcGeneralValues,
    clearLayersByType,
    getGridColStyle,
    getAvatarVoice,
    generateSSML,
    getVoice,
    getLayersByWordIndex,
    stopVoiceRequest,
    generateSsmlSubTag,
    generateSsmlBreakTag,
    generateSsmlPhonemeTag,
    generateSsmlSpeakTag,
    wordReplace,
    layers,
    text,
    updatedText,
    durationWithPauses,
    currentSelection,
    layersByType,
    audioIndexes,
    currentSelectionLayer,
    contextMenuCurrentSelection,
    avatarVoiceOriginal,
    avatarVoice,
    initialized,
    ssml,
    showAnimationList,
  };
};
