NIM_E31221299/venv/Lib/site-packages/wordcloud/wordcloud_cli.py

258 lines
8.8 KiB
Python

# -*- coding: utf-8 -*-
"""Command-line tool interface to generate word clouds.
"""
from __future__ import absolute_import
import sys
import textwrap
if __name__ == '__main__': # pragma: no cover
sys.exit(textwrap.dedent(
"""
To execute the CLI, instead consider running:
wordcloud_cli --help
or
python -m wordcloud --help
"""))
import io
import re
import argparse
import wordcloud as wc
import numpy as np
from PIL import Image
from . import __version__
class FileType(object):
"""Factory for creating file object types.
Port from argparse so we can support unicode file reading in Python2
Instances of FileType are typically passed as type= arguments to the
ArgumentParser add_argument() method.
Keyword Arguments:
- mode -- A string indicating how the file is to be opened. Accepts the
same values as the builtin open() function.
- bufsize -- The file's desired buffer size. Accepts the same values as
the builtin open() function.
"""
def __init__(self, mode='r', bufsize=-1):
self._mode = mode
self._bufsize = bufsize
def __call__(self, string):
# the special argument "-" means sys.std{in,out}
if string == '-':
if 'r' in self._mode:
return sys.stdin
elif 'w' in self._mode:
return sys.stdout.buffer if 'b' in self._mode else sys.stdout
else:
msg = 'argument "-" with mode %r' % self._mode
raise ValueError(msg)
# all other arguments are used as file names
try:
encoding = None if 'b' in self._mode else "UTF-8"
return io.open(string, self._mode, self._bufsize, encoding=encoding)
except IOError as e:
message = "can't open '%s': %s"
raise argparse.ArgumentTypeError(message % (string, e))
def __repr__(self):
args = self._mode, self._bufsize
args_str = ', '.join(repr(arg) for arg in args if arg != -1)
return '%s(%s)' % (type(self).__name__, args_str)
class RegExpAction(argparse.Action):
def __init__(self, option_strings, dest, **kwargs):
super(RegExpAction, self).__init__(option_strings, dest, **kwargs)
def __call__(self, parser, namespace, values, option_string=None):
try:
re.compile(values)
except re.error as e:
raise argparse.ArgumentError(self, 'Invalid regular expression: ' + str(e))
setattr(namespace, self.dest, values)
def main(args, text, imagefile):
wordcloud = wc.WordCloud(**args)
wordcloud.generate(text)
image = wordcloud.to_image()
with imagefile:
image.save(imagefile, format='png', optimize=True)
def make_parser():
description = 'A simple command line interface for wordcloud module.'
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'--text', metavar='file', type=FileType(), default='-',
help='specify file of words to build the word cloud (default: stdin)')
parser.add_argument(
'--regexp', metavar='regexp', default=None, action=RegExpAction,
help='override the regular expression defining what constitutes a word')
parser.add_argument(
'--stopwords', metavar='file', type=FileType(),
help='specify file of stopwords (containing one word per line)'
' to remove from the given text after parsing')
parser.add_argument(
'--imagefile', metavar='file', type=FileType('wb'),
default='-',
help='file the completed PNG image should be written to'
' (default: stdout)')
parser.add_argument(
'--fontfile', metavar='path', dest='font_path',
help='path to font file you wish to use (default: DroidSansMono)')
parser.add_argument(
'--mask', metavar='file', type=argparse.FileType('rb'),
help='mask to use for the image form')
parser.add_argument(
'--colormask', metavar='file', type=argparse.FileType('rb'),
help='color mask to use for image coloring')
parser.add_argument(
'--contour_width', metavar='width', default=0, type=float,
dest='contour_width',
help='if greater than 0, draw mask contour (default: 0)')
parser.add_argument(
'--contour_color', metavar='color', default='black', type=str,
dest='contour_color',
help='use given color as mask contour color -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--relative_scaling', type=float, default=0,
metavar='rs', help=' scaling of words by frequency (0 - 1)')
parser.add_argument(
'--margin', type=int, default=2,
metavar='width', help='spacing to leave around words')
parser.add_argument(
'--width', type=int, default=400,
metavar='width', help='define output image width')
parser.add_argument(
'--height', type=int, default=200,
metavar='height', help='define output image height')
parser.add_argument(
'--color', metavar='color',
help='use given color as coloring for the image -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--background', metavar='color', default='black', type=str,
dest='background_color',
help='use given color as background color for the image -'
' accepts any value from PIL.ImageColor.getcolor')
parser.add_argument(
'--no_collocations', action='store_false', dest='collocations',
help='do not add collocations (bigrams) to word cloud '
'(default: add unigrams and bigrams)')
parser.add_argument(
'--include_numbers',
action='store_true',
dest='include_numbers',
help='include numbers in wordcloud?')
parser.add_argument(
'--min_word_length',
type=int,
default=0,
metavar='min_word_length',
dest='min_word_length',
help='only include words with more than X letters')
parser.add_argument(
'--prefer_horizontal',
type=float, default=.9, metavar='ratio',
help='ratio of times to try horizontal fitting as opposed to vertical')
parser.add_argument(
'--scale',
type=float, default=1, metavar='scale',
help='scaling between computation and drawing')
parser.add_argument(
'--colormap',
type=str, default='viridis', metavar='map',
help='matplotlib colormap name')
parser.add_argument(
'--mode',
type=str, default='RGB', metavar='mode',
help='use RGB or RGBA for transparent background')
parser.add_argument(
'--max_words',
type=int, default=200, metavar='N',
help='maximum number of words')
parser.add_argument(
'--min_font_size',
type=int, default=4, metavar='size',
help='smallest font size to use')
parser.add_argument(
'--max_font_size',
type=int, default=None, metavar='size',
help='maximum font size for the largest word')
parser.add_argument(
'--font_step',
type=int, default=1, metavar='step',
help='step size for the font')
parser.add_argument(
'--random_state',
type=int, default=None, metavar='seed',
help='random seed')
parser.add_argument(
'--no_normalize_plurals',
action='store_false',
dest='normalize_plurals',
help='whether to remove trailing \'s\' from words')
parser.add_argument(
'--repeat',
action='store_true',
dest='repeat',
help='whether to repeat words and phrases')
parser.add_argument(
'--version', action='version',
version='%(prog)s {version}'.format(version=__version__))
return parser
def parse_args(arguments):
# prog = 'python wordcloud_cli.py'
parser = make_parser()
args = parser.parse_args(arguments)
if args.background_color == 'None':
args.background_color = None
if args.colormask and args.color:
raise ValueError('specify either a color mask or a color function')
args = vars(args)
with args.pop('text') as f:
text = f.read()
if args['stopwords']:
with args.pop('stopwords') as f:
args['stopwords'] = set(map(lambda l: l.strip(), f.readlines()))
if args['mask']:
mask = args.pop('mask')
args['mask'] = np.array(Image.open(mask))
color_func = wc.random_color_func
colormask = args.pop('colormask')
color = args.pop('color')
if colormask:
image = np.array(Image.open(colormask))
color_func = wc.ImageColorGenerator(image)
if color:
color_func = wc.get_single_color_func(color)
args['color_func'] = color_func
imagefile = args.pop('imagefile')
return args, text, imagefile