NIM_E31221299/scripts/venv/Lib/site-packages/Sastrawi/Stemmer/Context/Context.py

from Sastrawi.Stemmer.ConfixStripping.PrecedenceAdjustmentSpecification \
   import PrecedenceAdjustmentSpecification

class Context(object):
    """Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS"""

    def __init__(self, original_word, dictionary, visitor_provider):
        self.original_word = original_word
        self.current_word = original_word
        self.dictionary = dictionary
        self.visitor_provider = visitor_provider

        self.process_is_stopped = False
        self.removals = []
        self.visitors = []
        self.suffix_visitors = []
        self.prefix_pisitors = []
        self.result = ''

        self.init_visitors()

    def init_visitors(self):
        self.visitors = self.visitor_provider.get_visitors()
        self.suffix_visitors = self.visitor_provider.get_suffix_visitors()
        self.prefix_pisitors = self.visitor_provider.get_prefix_visitors()

    def stopProcess(self):
        self.process_is_stopped = True

    def add_removal(self, removal):
        self.removals.append(removal)

    def execute(self):
        """Execute stemming process; the result can be retrieved with result"""

        #step 1 - 5
        self.start_stemming_process()

        #step 6
        if self.dictionary.contains(self.current_word):
            self.result = self.current_word
        else:
            self.result = self.original_word

    def start_stemming_process(self):

        #step 1
        if self.dictionary.contains(self.current_word):
            return
        self.accept_visitors(self.visitors)
        if self.dictionary.contains(self.current_word):
            return

        csPrecedenceAdjustmentSpecification = PrecedenceAdjustmentSpecification()

        #Confix Stripping
        #Try to remove prefix before suffix if the specification is met
        if csPrecedenceAdjustmentSpecification.is_satisfied_by(self.original_word):
            #step 4, 5
            self.remove_prefixes()
            if self.dictionary.contains(self.current_word):
                return

            #step 2, 3
            self.remove_suffixes()
            if self.dictionary.contains(self.current_word):
                return
            else:
                #if the trial is failed, restore the original word
                #and continue to normal rule precedence (suffix first, prefix afterwards)
                self.current_word = self.original_word
                self.removals = []

        #step 2, 3
        self.remove_suffixes()
        if self.dictionary.contains(self.current_word):
            return

        #step 4, 5
        self.remove_prefixes()
        if self.dictionary.contains(self.current_word):
            return

        #ECS loop pengembalian akhiran
        self.loop_pengembalian_akhiran()

    def remove_prefixes(self):
        for i in range(3):
            self.accept_prefix_visitors(self.prefix_pisitors)
            if self.dictionary.contains(self.current_word):
                return

    def remove_suffixes(self):
        self.accept_visitors(self.suffix_visitors)

    def accept(self, visitor):
        visitor.visit(self)

    def accept_visitors(self, visitors):
        for visitor in visitors:
            self.accept(visitor)
            if self.dictionary.contains(self.current_word):
                return self.current_word
            if self.process_is_stopped:
                return self.current_word

    def accept_prefix_visitors(self, visitors):
        removalCount = len(self.removals)
        for visitor in visitors:
            self.accept(visitor)
            if self.dictionary.contains(self.current_word):
                return self.current_word
            if self.process_is_stopped:
                return self.current_word
            if len(self.removals) > removalCount:
                return

    def loop_pengembalian_akhiran(self):
        """ECS Loop Pengembalian Akhiran"""
        self.restore_prefix()

        removals = self.removals
        reversed_removals = reversed(removals)
        current_word = self.current_word

        for removal in reversed_removals:
            if not self.is_suffix_removal(removal):
                continue
            if removal.get_removed_part() == 'kan':
                self.current_word = removal.result + 'k'

                #step 4,5
                self.remove_prefixes()
                if self.dictionary.contains(self.current_word):
                    return
                self.current_word = removal.result + 'kan'
            else:
                self.current_word = removal.get_subject()

            #step 4,5
            self.remove_prefixes()
            if self.dictionary.contains(self.current_word):
                return

            self.removals = removals
            self.current_word = current_word

    def is_suffix_removal(self, removal):
        """Check wether the removed part is a suffix"""
        return removal.get_affix_type() == 'DS' \
               or removal.get_affix_type() == 'PP' \
               or removal.get_affix_type() == 'P'

    def restore_prefix(self):
        """Restore prefix to proceed with ECS loop pengembalian akhiran"""
        for removal in self.removals:
            #return the word before precoding (the subject of first prefix removal)
            self.current_word = removal.get_subject()
            break

        for removal in self.removals:
            if removal.get_affix_type() == 'DP':
                self.removals.remove(removal)