Recast text data by tokenising it.
Tokenisation supported: word tokenisation sentence tokenisation package: string (‘nltk’, ‘spacy’), default=’nltk’ method: string (‘word’, ‘sentence’), default=None verbose: int (0, 1, -1), default=0 >>> # method='word' >>> from swachhdata.text import TokenisationRecast >>> text = 'Grabbing her umbrella, Kate raced out of the house. Confused by her sister’s sudden change in mood, Jill stayed quiet.' >>> rec = TokenisationRecast(package='nltk', method='word') >>> rec.setup(text) >>> rec.recast() ['Grabbing', 'her', 'umbrella', ',', 'Kate', 'raced', 'out', 'of', 'the', 'house', '.', 'Confused', 'by', 'her', 'sister', '’', 's', 'sudden', 'change', 'in', 'mood', ',', 'Jill', 'stayed', 'quiet', '.'] >>> # OR >>> rec.setup_recast(text) ['Grabbing', 'her', 'umbrella', ',', 'Kate', 'raced', 'out', 'of', 'the', 'house', '.', 'Confused', 'by', 'her', 'sister', '’', 's', 'sudden', 'change', 'in', 'mood', ',', 'Jill', 'stayed', 'quiet', '.'] >>> >>> # method='sentence' >>> from swachhdata.text import TokenisationRecast >>> text = 'You can have a look at our catalogue at www.samplewebsite.com in the services tab' >>> rec = TokenisationRecast(package='nltk', method='sentence') >>> rec.setup(text) >>> rec.recast() ['Grabbing her umbrella, Kate raced out of the house.', 'Confused by her sister’s sudden change in mood, Jill stayed quiet.'] >>> # OR >>> rec.setup_recast(text) ['Grabbing her umbrella, Kate raced out of the house.', 'Confused by her sister’s sudden change in mood, Jill stayed quiet.']
word tokenisation
sentence tokenisation
package: string (‘nltk’, ‘spacy’), default=’nltk’ method: string (‘word’, ‘sentence’), default=None verbose: int (0, 1, -1), default=0
>>> # method='word' >>> from swachhdata.text import TokenisationRecast >>> text = 'Grabbing her umbrella, Kate raced out of the house. Confused by her sister’s sudden change in mood, Jill stayed quiet.' >>> rec = TokenisationRecast(package='nltk', method='word') >>> rec.setup(text) >>> rec.recast() ['Grabbing', 'her', 'umbrella', ',', 'Kate', 'raced', 'out', 'of', 'the', 'house', '.', 'Confused', 'by', 'her', 'sister', '’', 's', 'sudden', 'change', 'in', 'mood', ',', 'Jill', 'stayed', 'quiet', '.'] >>> # OR >>> rec.setup_recast(text) ['Grabbing', 'her', 'umbrella', ',', 'Kate', 'raced', 'out', 'of', 'the', 'house', '.', 'Confused', 'by', 'her', 'sister', '’', 's', 'sudden', 'change', 'in', 'mood', ',', 'Jill', 'stayed', 'quiet', '.'] >>> >>> # method='sentence' >>> from swachhdata.text import TokenisationRecast >>> text = 'You can have a look at our catalogue at www.samplewebsite.com in the services tab' >>> rec = TokenisationRecast(package='nltk', method='sentence') >>> rec.setup(text) >>> rec.recast() ['Grabbing her umbrella, Kate raced out of the house.', 'Confused by her sister’s sudden change in mood, Jill stayed quiet.'] >>> # OR >>> rec.setup_recast(text) ['Grabbing her umbrella, Kate raced out of the house.', 'Confused by her sister’s sudden change in mood, Jill stayed quiet.']