43 lines
1.5 KiB
Python
43 lines
1.5 KiB
Python
import os
|
|
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
|
|
from Sastrawi.Stemmer.Stemmer import Stemmer
|
|
from Sastrawi.Stemmer.CachedStemmer import CachedStemmer
|
|
from Sastrawi.Stemmer.Cache.ArrayCache import ArrayCache
|
|
|
|
class StemmerFactory(object):
|
|
""" Stemmer factory helps creating pre-configured stemmer """
|
|
APC_KEY = 'sastrawi_cache_dictionary'
|
|
|
|
def create_stemmer(self, isDev=False):
|
|
""" Returns Stemmer instance """
|
|
|
|
words = self.get_words(isDev)
|
|
dictionary = ArrayDictionary(words)
|
|
stemmer = Stemmer(dictionary)
|
|
|
|
resultCache = ArrayCache()
|
|
cachedStemmer = CachedStemmer(resultCache, stemmer)
|
|
|
|
return cachedStemmer
|
|
|
|
def get_words(self, isDev=False):
|
|
#if isDev or callable(getattr(self, 'apc_fetch')):
|
|
# words = self.getWordsFromFile()
|
|
#else:
|
|
# words = apc_fetch(self.APC_KEY)
|
|
# if not words:
|
|
# words = self.getWordsFromFile()
|
|
# apc_store(self.APC_KEY, words)
|
|
return self.get_words_from_file()
|
|
|
|
def get_words_from_file(self):
|
|
current_dir = os.path.dirname(os.path.realpath(__file__))
|
|
dictionaryFile = current_dir + '/data/kata-dasar.txt'
|
|
if not os.path.isfile(dictionaryFile):
|
|
raise RuntimeError('Dictionary file is missing. It seems that your installation is corrupted.')
|
|
|
|
dictionaryContent = ''
|
|
with open(dictionaryFile, 'r') as f:
|
|
dictionaryContent = f.read()
|
|
|
|
return dictionaryContent.split('\n') |