258 lines
8.8 KiB
Python
258 lines
8.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Command-line tool interface to generate word clouds.
|
|
"""
|
|
from __future__ import absolute_import
|
|
|
|
import sys
|
|
import textwrap
|
|
|
|
if __name__ == '__main__': # pragma: no cover
|
|
sys.exit(textwrap.dedent(
|
|
"""
|
|
To execute the CLI, instead consider running:
|
|
|
|
wordcloud_cli --help
|
|
|
|
or
|
|
|
|
python -m wordcloud --help
|
|
"""))
|
|
|
|
import io
|
|
import re
|
|
import argparse
|
|
import wordcloud as wc
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from . import __version__
|
|
|
|
|
|
class FileType(object):
|
|
"""Factory for creating file object types.
|
|
|
|
Port from argparse so we can support unicode file reading in Python2
|
|
|
|
Instances of FileType are typically passed as type= arguments to the
|
|
ArgumentParser add_argument() method.
|
|
|
|
Keyword Arguments:
|
|
- mode -- A string indicating how the file is to be opened. Accepts the
|
|
same values as the builtin open() function.
|
|
- bufsize -- The file's desired buffer size. Accepts the same values as
|
|
the builtin open() function.
|
|
|
|
"""
|
|
|
|
def __init__(self, mode='r', bufsize=-1):
|
|
self._mode = mode
|
|
self._bufsize = bufsize
|
|
|
|
def __call__(self, string):
|
|
# the special argument "-" means sys.std{in,out}
|
|
if string == '-':
|
|
if 'r' in self._mode:
|
|
return sys.stdin
|
|
elif 'w' in self._mode:
|
|
return sys.stdout.buffer if 'b' in self._mode else sys.stdout
|
|
else:
|
|
msg = 'argument "-" with mode %r' % self._mode
|
|
raise ValueError(msg)
|
|
|
|
# all other arguments are used as file names
|
|
try:
|
|
encoding = None if 'b' in self._mode else "UTF-8"
|
|
return io.open(string, self._mode, self._bufsize, encoding=encoding)
|
|
except IOError as e:
|
|
message = "can't open '%s': %s"
|
|
raise argparse.ArgumentTypeError(message % (string, e))
|
|
|
|
def __repr__(self):
|
|
args = self._mode, self._bufsize
|
|
args_str = ', '.join(repr(arg) for arg in args if arg != -1)
|
|
return '%s(%s)' % (type(self).__name__, args_str)
|
|
|
|
|
|
class RegExpAction(argparse.Action):
|
|
def __init__(self, option_strings, dest, **kwargs):
|
|
super(RegExpAction, self).__init__(option_strings, dest, **kwargs)
|
|
|
|
def __call__(self, parser, namespace, values, option_string=None):
|
|
try:
|
|
re.compile(values)
|
|
except re.error as e:
|
|
raise argparse.ArgumentError(self, 'Invalid regular expression: ' + str(e))
|
|
setattr(namespace, self.dest, values)
|
|
|
|
|
|
def main(args, text, imagefile):
|
|
wordcloud = wc.WordCloud(**args)
|
|
wordcloud.generate(text)
|
|
image = wordcloud.to_image()
|
|
|
|
with imagefile:
|
|
image.save(imagefile, format='png', optimize=True)
|
|
|
|
|
|
def make_parser():
|
|
description = 'A simple command line interface for wordcloud module.'
|
|
parser = argparse.ArgumentParser(description=description)
|
|
parser.add_argument(
|
|
'--text', metavar='file', type=FileType(), default='-',
|
|
help='specify file of words to build the word cloud (default: stdin)')
|
|
parser.add_argument(
|
|
'--regexp', metavar='regexp', default=None, action=RegExpAction,
|
|
help='override the regular expression defining what constitutes a word')
|
|
parser.add_argument(
|
|
'--stopwords', metavar='file', type=FileType(),
|
|
help='specify file of stopwords (containing one word per line)'
|
|
' to remove from the given text after parsing')
|
|
parser.add_argument(
|
|
'--imagefile', metavar='file', type=FileType('wb'),
|
|
default='-',
|
|
help='file the completed PNG image should be written to'
|
|
' (default: stdout)')
|
|
parser.add_argument(
|
|
'--fontfile', metavar='path', dest='font_path',
|
|
help='path to font file you wish to use (default: DroidSansMono)')
|
|
parser.add_argument(
|
|
'--mask', metavar='file', type=argparse.FileType('rb'),
|
|
help='mask to use for the image form')
|
|
parser.add_argument(
|
|
'--colormask', metavar='file', type=argparse.FileType('rb'),
|
|
help='color mask to use for image coloring')
|
|
parser.add_argument(
|
|
'--contour_width', metavar='width', default=0, type=float,
|
|
dest='contour_width',
|
|
help='if greater than 0, draw mask contour (default: 0)')
|
|
parser.add_argument(
|
|
'--contour_color', metavar='color', default='black', type=str,
|
|
dest='contour_color',
|
|
help='use given color as mask contour color -'
|
|
' accepts any value from PIL.ImageColor.getcolor')
|
|
parser.add_argument(
|
|
'--relative_scaling', type=float, default=0,
|
|
metavar='rs', help=' scaling of words by frequency (0 - 1)')
|
|
parser.add_argument(
|
|
'--margin', type=int, default=2,
|
|
metavar='width', help='spacing to leave around words')
|
|
parser.add_argument(
|
|
'--width', type=int, default=400,
|
|
metavar='width', help='define output image width')
|
|
parser.add_argument(
|
|
'--height', type=int, default=200,
|
|
metavar='height', help='define output image height')
|
|
parser.add_argument(
|
|
'--color', metavar='color',
|
|
help='use given color as coloring for the image -'
|
|
' accepts any value from PIL.ImageColor.getcolor')
|
|
parser.add_argument(
|
|
'--background', metavar='color', default='black', type=str,
|
|
dest='background_color',
|
|
help='use given color as background color for the image -'
|
|
' accepts any value from PIL.ImageColor.getcolor')
|
|
parser.add_argument(
|
|
'--no_collocations', action='store_false', dest='collocations',
|
|
help='do not add collocations (bigrams) to word cloud '
|
|
'(default: add unigrams and bigrams)')
|
|
parser.add_argument(
|
|
'--include_numbers',
|
|
action='store_true',
|
|
dest='include_numbers',
|
|
help='include numbers in wordcloud?')
|
|
parser.add_argument(
|
|
'--min_word_length',
|
|
type=int,
|
|
default=0,
|
|
metavar='min_word_length',
|
|
dest='min_word_length',
|
|
help='only include words with more than X letters')
|
|
parser.add_argument(
|
|
'--prefer_horizontal',
|
|
type=float, default=.9, metavar='ratio',
|
|
help='ratio of times to try horizontal fitting as opposed to vertical')
|
|
parser.add_argument(
|
|
'--scale',
|
|
type=float, default=1, metavar='scale',
|
|
help='scaling between computation and drawing')
|
|
parser.add_argument(
|
|
'--colormap',
|
|
type=str, default='viridis', metavar='map',
|
|
help='matplotlib colormap name')
|
|
parser.add_argument(
|
|
'--mode',
|
|
type=str, default='RGB', metavar='mode',
|
|
help='use RGB or RGBA for transparent background')
|
|
parser.add_argument(
|
|
'--max_words',
|
|
type=int, default=200, metavar='N',
|
|
help='maximum number of words')
|
|
parser.add_argument(
|
|
'--min_font_size',
|
|
type=int, default=4, metavar='size',
|
|
help='smallest font size to use')
|
|
parser.add_argument(
|
|
'--max_font_size',
|
|
type=int, default=None, metavar='size',
|
|
help='maximum font size for the largest word')
|
|
parser.add_argument(
|
|
'--font_step',
|
|
type=int, default=1, metavar='step',
|
|
help='step size for the font')
|
|
parser.add_argument(
|
|
'--random_state',
|
|
type=int, default=None, metavar='seed',
|
|
help='random seed')
|
|
parser.add_argument(
|
|
'--no_normalize_plurals',
|
|
action='store_false',
|
|
dest='normalize_plurals',
|
|
help='whether to remove trailing \'s\' from words')
|
|
parser.add_argument(
|
|
'--repeat',
|
|
action='store_true',
|
|
dest='repeat',
|
|
help='whether to repeat words and phrases')
|
|
parser.add_argument(
|
|
'--version', action='version',
|
|
version='%(prog)s {version}'.format(version=__version__))
|
|
return parser
|
|
|
|
|
|
def parse_args(arguments):
|
|
# prog = 'python wordcloud_cli.py'
|
|
parser = make_parser()
|
|
args = parser.parse_args(arguments)
|
|
if args.background_color == 'None':
|
|
args.background_color = None
|
|
|
|
if args.colormask and args.color:
|
|
raise ValueError('specify either a color mask or a color function')
|
|
|
|
args = vars(args)
|
|
|
|
with args.pop('text') as f:
|
|
text = f.read()
|
|
|
|
if args['stopwords']:
|
|
with args.pop('stopwords') as f:
|
|
args['stopwords'] = set(map(lambda l: l.strip(), f.readlines()))
|
|
|
|
if args['mask']:
|
|
mask = args.pop('mask')
|
|
args['mask'] = np.array(Image.open(mask))
|
|
|
|
color_func = wc.random_color_func
|
|
colormask = args.pop('colormask')
|
|
color = args.pop('color')
|
|
if colormask:
|
|
image = np.array(Image.open(colormask))
|
|
color_func = wc.ImageColorGenerator(image)
|
|
if color:
|
|
color_func = wc.get_single_color_func(color)
|
|
args['color_func'] = color_func
|
|
|
|
imagefile = args.pop('imagefile')
|
|
|
|
return args, text, imagefile
|