Solve InterviewCake's word-cloud problem
Write a function to count the frequency of words in a sentence. Ignore casing for words; ignore punctuation.
This commit is contained in:
		
							parent
							
								
									ef2ce90aa7
								
							
						
					
					
						commit
						e4cdb5daed
					
				
					 2 changed files with 80 additions and 1 deletions
				
			
		
							
								
								
									
										79
									
								
								scratch/deepmind/part_two/word-cloud.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								scratch/deepmind/part_two/word-cloud.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,79 @@ | |||
| import unittest | ||||
| import re | ||||
| from collections import Counter | ||||
| 
 | ||||
| 
 | ||||
| class WordCloudData(object): | ||||
|     def __init__(self, x): | ||||
|         x = x.replace('...', ' ').replace(' - ', ' ') | ||||
|         x = ''.join(c for c in x if c not in ',.!?;:') | ||||
|         self.words_to_counts = dict( | ||||
|             Counter(x.lower() for x in re.split(r'\s+', x))) | ||||
| 
 | ||||
| 
 | ||||
| # Tests | ||||
| class Test(unittest.TestCase): | ||||
|     def test_simple_sentence(self): | ||||
|         input = 'I like cake' | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = {'i': 1, 'like': 1, 'cake': 1} | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
|     def test_longer_sentence(self): | ||||
|         input = 'Chocolate cake for dinner and pound cake for dessert' | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = { | ||||
|             'and': 1, | ||||
|             'pound': 1, | ||||
|             'for': 2, | ||||
|             'dessert': 1, | ||||
|             'chocolate': 1, | ||||
|             'dinner': 1, | ||||
|             'cake': 2, | ||||
|         } | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
|     def test_punctuation(self): | ||||
|         input = 'Strawberry short cake? Yum!' | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = {'cake': 1, 'strawberry': 1, 'short': 1, 'yum': 1} | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
|     def test_hyphenated_words(self): | ||||
|         input = 'Dessert - mille-feuille cake' | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = {'cake': 1, 'dessert': 1, 'mille-feuille': 1} | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
|     def test_ellipses_between_words(self): | ||||
|         input = 'Mmm...mmm...decisions...decisions' | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = {'mmm': 2, 'decisions': 2} | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
|     def test_apostrophes(self): | ||||
|         input = "Allie's Bakery: Sasha's Cakes" | ||||
| 
 | ||||
|         word_cloud = WordCloudData(input) | ||||
|         actual = word_cloud.words_to_counts | ||||
| 
 | ||||
|         expected = {"bakery": 1, "cakes": 1, "allie's": 1, "sasha's": 1} | ||||
|         self.assertEqual(actual, expected) | ||||
| 
 | ||||
| 
 | ||||
| unittest.main(verbosity=2) | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue