import sys import nltk import random import re # Grab a file from standard input, dump it in a string. source_text = sys.stdin.read() # Use NLTK to make some guesses about each word's part of speech. token_text = nltk.word_tokenize(source_text) pos_text = nltk.pos_tag(token_text) # Set up a dictionary where each key is a POS holding a list # of each word of that type from the text. pos_table = dict() for tagged_word in pos_text: # Create the list, if it doesn't exist already. if tagged_word[1] not in pos_table: pos_table[tagged_word[1]] = list() pos_table[tagged_word[1]].append(tagged_word[0]) # Scramble the word lists. for pos_key in pos_table: random.shuffle(pos_table[pos_key]) # Rebuild the text. output = str() for tagged_word in pos_text: # Take the last word from the scrambled list. word = pos_table[tagged_word[1]].pop() # Leave out the space if it's punctuation. if not re.match("[\.,;:'!?]", word): output += " " # Accmulate the words output += word # Remove white space. output = output.strip() print output