Implement the Levenstein "edit distance" algorithm
This is the mother of dynamic programming algorithms in my opinion. It computes the minimal "edit distance" between two input strings where an edit is considered one of: - inserting a character into `a` - deleting a character from `a` - substituting a character in `a` with a character from `b` It took me awhile to grok the algorithm, but I implemented this from my understanding of something that I read ~3 nights prior, so I must've understood what I read. Good news!
This commit is contained in:
		
							parent
							
								
									f652ea0be6
								
							
						
					
					
						commit
						847aad2a14
					
				
					 1 changed files with 47 additions and 0 deletions
				
			
		
							
								
								
									
										47
									
								
								scratch/facebook/edit-distance.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								scratch/facebook/edit-distance.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,47 @@
 | 
				
			||||||
 | 
					def print_grid(grid):
 | 
				
			||||||
 | 
					    result = []
 | 
				
			||||||
 | 
					    for row in grid:
 | 
				
			||||||
 | 
					        result.append(" ".join(str(c) for c in row))
 | 
				
			||||||
 | 
					    return print("\n".join(result))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def edit_distance(a, b):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Compute the "edit distance" to transform string `a` into string `b`.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    grid = []
 | 
				
			||||||
 | 
					    for row in range(len(a) + 1):
 | 
				
			||||||
 | 
					        r = []
 | 
				
			||||||
 | 
					        for col in range(len(b) + 1):
 | 
				
			||||||
 | 
					            r.append(0)
 | 
				
			||||||
 | 
					        grid.append(r)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # left-to-right
 | 
				
			||||||
 | 
					    # populate grid[0][i]
 | 
				
			||||||
 | 
					    for col in range(len(grid[0])):
 | 
				
			||||||
 | 
					        grid[0][col] = col
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # top-to-bottom
 | 
				
			||||||
 | 
					    # populate grid[i][0]
 | 
				
			||||||
 | 
					    for row in range(len(grid)):
 | 
				
			||||||
 | 
					        grid[row][0] = row
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for row in range(1, len(grid)):
 | 
				
			||||||
 | 
					        for col in range(1, len(grid[row])):
 | 
				
			||||||
 | 
					            # last characters are the same
 | 
				
			||||||
 | 
					            if a[0:row][-1] == b[0:col][-1]:
 | 
				
			||||||
 | 
					                grid[row][col] = grid[row - 1][col - 1]
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                # substitution
 | 
				
			||||||
 | 
					                s = 1 + grid[row - 1][col - 1]
 | 
				
			||||||
 | 
					                # deletion
 | 
				
			||||||
 | 
					                d = 1 + grid[row - 1][col]
 | 
				
			||||||
 | 
					                # insertion
 | 
				
			||||||
 | 
					                i = 1 + grid[row][col - 1]
 | 
				
			||||||
 | 
					                grid[row][col] = min(s, d, i)
 | 
				
			||||||
 | 
					    print_grid(grid)
 | 
				
			||||||
 | 
					    return grid[-1][-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					result = edit_distance("pizza", "pisa")
 | 
				
			||||||
 | 
					print(result)
 | 
				
			||||||
 | 
					assert result == 2
 | 
				
			||||||
 | 
					print("Success!")
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue