1
2
3 """Diff Match and Patch
4
5 Copyright 2006 Google Inc.
6 http://code.google.com/p/google-diff-match-patch/
7
8 Licensed under the Apache License, Version 2.0 (the "License");
9 you may not use this file except in compliance with the License.
10 You may obtain a copy of the License at
11
12 http://www.apache.org/licenses/LICENSE-2.0
13
14 Unless required by applicable law or agreed to in writing, software
15 distributed under the License is distributed on an "AS IS" BASIS,
16 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 See the License for the specific language governing permissions and
18 limitations under the License.
19 """
20
21 """Functions for diff, match and patch.
22
23 Computes the difference between two texts to create a patch.
24 Applies the patch onto another text, allowing for errors.
25 """
26
27 __author__ = 'fraser@google.com (Neil Fraser)'
28
29 import math
30 import time
31 import urllib
32 import re
33
35 """Class containing the diff, match and patch methods.
36
37 Also contains the behaviour settings.
38 """
39
41 """Inits a diff_match_patch object with default settings.
42 Redefine these in your program to override the defaults.
43 """
44
45
46 self.Diff_Timeout = 1.0
47
48 self.Diff_EditCost = 4
49
50
51 self.Diff_DualThreshold = 32
52
53 self.Match_Threshold = 0.5
54
55
56
57 self.Match_Distance = 1000
58
59
60
61
62 self.Patch_DeleteThreshold = 0.5
63
64 self.Patch_Margin = 4
65
66
67
68
69
70 self.Match_MaxBits = 32
71
72
73
74
75
76
77 DIFF_DELETE = -1
78 DIFF_INSERT = 1
79 DIFF_EQUAL = 0
80
81 - def diff_main(self, text1, text2, checklines=True):
82 """Find the differences between two texts. Simplifies the problem by
83 stripping any common prefix or suffix off the texts before diffing.
84
85 Args:
86 text1: Old string to be diffed.
87 text2: New string to be diffed.
88 checklines: Optional speedup flag. If present and false, then don't run
89 a line-level diff first to identify the changed areas.
90 Defaults to true, which does a faster, slightly less optimal diff.
91
92 Returns:
93 Array of changes.
94 """
95
96
97 if text1 == None or text2 == None:
98 raise ValueError("Null inputs. (diff_main)")
99
100
101 if text1 == text2:
102 return [(self.DIFF_EQUAL, text1)]
103
104
105 commonlength = self.diff_commonPrefix(text1, text2)
106 commonprefix = text1[:commonlength]
107 text1 = text1[commonlength:]
108 text2 = text2[commonlength:]
109
110
111 commonlength = self.diff_commonSuffix(text1, text2)
112 if commonlength == 0:
113 commonsuffix = ''
114 else:
115 commonsuffix = text1[-commonlength:]
116 text1 = text1[:-commonlength]
117 text2 = text2[:-commonlength]
118
119
120 diffs = self.diff_compute(text1, text2, checklines)
121
122
123 if commonprefix:
124 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
125 if commonsuffix:
126 diffs.append((self.DIFF_EQUAL, commonsuffix))
127 self.diff_cleanupMerge(diffs)
128 return diffs
129
131 """Find the differences between two texts. Assumes that the texts do not
132 have any common prefix or suffix.
133
134 Args:
135 text1: Old string to be diffed.
136 text2: New string to be diffed.
137 checklines: Speedup flag. If false, then don't run a line-level diff
138 first to identify the changed areas.
139 If true, then run a faster, slightly less optimal diff.
140
141 Returns:
142 Array of changes.
143 """
144 if not text1:
145
146 return [(self.DIFF_INSERT, text2)]
147
148 if not text2:
149
150 return [(self.DIFF_DELETE, text1)]
151
152 if len(text1) > len(text2):
153 (longtext, shorttext) = (text1, text2)
154 else:
155 (shorttext, longtext) = (text1, text2)
156 i = longtext.find(shorttext)
157 if i != -1:
158
159 diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
160 (self.DIFF_INSERT, longtext[i + len(shorttext):])]
161
162 if len(text1) > len(text2):
163 diffs[0] = (self.DIFF_DELETE, diffs[0][1])
164 diffs[2] = (self.DIFF_DELETE, diffs[2][1])
165 return diffs
166 longtext = shorttext = None
167
168
169 hm = self.diff_halfMatch(text1, text2)
170 if hm:
171
172 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
173
174 diffs_a = self.diff_main(text1_a, text2_a, checklines)
175 diffs_b = self.diff_main(text1_b, text2_b, checklines)
176
177 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
178
179
180 if checklines and (len(text1) < 100 or len(text2) < 100):
181 checklines = False
182 if checklines:
183
184 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
185
186 diffs = self.diff_map(text1, text2)
187 if not diffs:
188 diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
189 if checklines:
190
191 self.diff_charsToLines(diffs, linearray)
192
193 self.diff_cleanupSemantic(diffs)
194
195
196
197 diffs.append((self.DIFF_EQUAL, ''))
198 pointer = 0
199 count_delete = 0
200 count_insert = 0
201 text_delete = ''
202 text_insert = ''
203 while pointer < len(diffs):
204 if diffs[pointer][0] == self.DIFF_INSERT:
205 count_insert += 1
206 text_insert += diffs[pointer][1]
207 elif diffs[pointer][0] == self.DIFF_DELETE:
208 count_delete += 1
209 text_delete += diffs[pointer][1]
210 elif diffs[pointer][0] == self.DIFF_EQUAL:
211
212 if count_delete >= 1 and count_insert >= 1:
213
214 a = self.diff_main(text_delete, text_insert, False)
215 diffs[pointer - count_delete - count_insert : pointer] = a
216 pointer = pointer - count_delete - count_insert + len(a)
217 count_insert = 0
218 count_delete = 0
219 text_delete = ''
220 text_insert = ''
221
222 pointer += 1
223
224 diffs.pop()
225 return diffs
226
228 """Split two texts into an array of strings. Reduce the texts to a string
229 of hashes where each Unicode character represents one line.
230
231 Args:
232 text1: First string.
233 text2: Second string.
234
235 Returns:
236 Three element tuple, containing the encoded text1, the encoded text2 and
237 the array of unique strings. The zeroth element of the array of unique
238 strings is intentionally blank.
239 """
240 lineArray = []
241 lineHash = {}
242
243
244
245 lineArray.append('')
246
247 def diff_linesToCharsMunge(text):
248 """Split a text into an array of strings. Reduce the texts to a string
249 of hashes where each Unicode character represents one line.
250 Modifies linearray and linehash through being a closure.
251
252 Args:
253 text: String to encode.
254
255 Returns:
256 Encoded string.
257 """
258 chars = []
259
260
261
262 lineStart = 0
263 lineEnd = -1
264 while lineEnd < len(text) - 1:
265 lineEnd = text.find('\n', lineStart)
266 if lineEnd == -1:
267 lineEnd = len(text) - 1
268 line = text[lineStart:lineEnd + 1]
269 lineStart = lineEnd + 1
270
271 if line in lineHash:
272 chars.append(unichr(lineHash[line]))
273 else:
274 lineArray.append(line)
275 lineHash[line] = len(lineArray) - 1
276 chars.append(unichr(len(lineArray) - 1))
277 return "".join(chars)
278
279 chars1 = diff_linesToCharsMunge(text1)
280 chars2 = diff_linesToCharsMunge(text2)
281 return (chars1, chars2, lineArray)
282
284 """Rehydrate the text in a diff from a string of line hashes to real lines
285 of text.
286
287 Args:
288 diffs: Array of diff tuples.
289 lineArray: Array of unique strings.
290 """
291 for x in xrange(len(diffs)):
292 text = []
293 for char in diffs[x][1]:
294 text.append(lineArray[ord(char)])
295 diffs[x] = (diffs[x][0], "".join(text))
296
298 """Explore the intersection points between the two texts.
299
300 Args:
301 text1: Old string to be diffed.
302 text2: New string to be diffed.
303
304 Returns:
305 Array of diff tuples or None if no diff available.
306 """
307
308
309 s_end = time.time() + self.Diff_Timeout
310
311 text1_length = len(text1)
312 text2_length = len(text2)
313 max_d = text1_length + text2_length - 1
314 doubleEnd = self.Diff_DualThreshold * 2 < max_d
315
316
317
318
319 v_map1 = []
320 v_map2 = []
321 v1 = {}
322 v2 = {}
323 v1[1] = 0
324 v2[1] = 0
325 footsteps = {}
326 done = False
327
328
329 front = (text1_length + text2_length) % 2
330 for d in xrange(max_d):
331
332 if self.Diff_Timeout > 0 and time.time() > s_end:
333 return None
334
335
336 v_map1.append({})
337 for k in xrange(-d, d + 1, 2):
338 if k == -d or k != d and v1[k - 1] < v1[k + 1]:
339 x = v1[k + 1]
340 else:
341 x = v1[k - 1] + 1
342 y = x - k
343 if doubleEnd:
344 footstep = str((x << 32) + y)
345 if front and footstep in footsteps:
346 done = True
347 if not front:
348 footsteps[footstep] = d
349
350 while (not done and x < text1_length and y < text2_length and
351 text1[x] == text2[y]):
352 x += 1
353 y += 1
354 if doubleEnd:
355 footstep = str((x << 32) + y)
356 if front and footstep in footsteps:
357 done = True
358 if not front:
359 footsteps[footstep] = d
360
361 v1[k] = x
362 v_map1[d][(x << 32) + y] = True
363 if x == text1_length and y == text2_length:
364
365 return self.diff_path1(v_map1, text1, text2)
366 elif done:
367
368 v_map2 = v_map2[:footsteps[footstep] + 1]
369 a = self.diff_path1(v_map1, text1[:x], text2[:y])
370 b = self.diff_path2(v_map2, text1[x:], text2[y:])
371 return a + b
372
373 if doubleEnd:
374
375 v_map2.append({})
376 for k in xrange(-d, d + 1, 2):
377 if k == -d or k != d and v2[k - 1] < v2[k + 1]:
378 x = v2[k + 1]
379 else:
380 x = v2[k - 1] + 1
381 y = x - k
382 footstep = str((text1_length - x << 32) + text2_length - y)
383 if not front and footstep in footsteps:
384 done = True
385 if front:
386 footsteps[footstep] = d
387 while (not done and x < text1_length and y < text2_length and
388 text1[-x - 1] == text2[-y - 1]):
389 x += 1
390 y += 1
391 footstep = str((text1_length - x << 32) + text2_length - y)
392 if not front and footstep in footsteps:
393 done = True
394 if front:
395 footsteps[footstep] = d
396
397 v2[k] = x
398 v_map2[d][(x << 32) + y] = True
399 if done:
400
401 v_map1 = v_map1[:footsteps[footstep] + 1]
402 a = self.diff_path1(v_map1, text1[:text1_length - x],
403 text2[:text2_length - y])
404 b = self.diff_path2(v_map2, text1[text1_length - x:],
405 text2[text2_length - y:])
406 return a + b
407
408
409 return None
410
412 """Work from the middle back to the start to determine the path.
413
414 Args:
415 v_map: Array of paths.
416 text1: Old string fragment to be diffed.
417 text2: New string fragment to be diffed.
418
419 Returns:
420 Array of diff tuples.
421 """
422 path = []
423 x = len(text1)
424 y = len(text2)
425 last_op = None
426 for d in xrange(len(v_map) - 2, -1, -1):
427 while True:
428 if (x - 1 << 32) + y in v_map[d]:
429 x -= 1
430 if last_op == self.DIFF_DELETE:
431 path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
432 else:
433 path[:0] = [(self.DIFF_DELETE, text1[x])]
434 last_op = self.DIFF_DELETE
435 break
436 elif (x << 32) + y - 1 in v_map[d]:
437 y -= 1
438 if last_op == self.DIFF_INSERT:
439 path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
440 else:
441 path[:0] = [(self.DIFF_INSERT, text2[y])]
442 last_op = self.DIFF_INSERT
443 break
444 else:
445 x -= 1
446 y -= 1
447 assert text1[x] == text2[y], ("No diagonal. " +
448 "Can't happen. (diff_path1)")
449 if last_op == self.DIFF_EQUAL:
450 path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
451 else:
452 path[:0] = [(self.DIFF_EQUAL, text1[x])]
453 last_op = self.DIFF_EQUAL
454 return path
455
457 """Work from the middle back to the end to determine the path.
458
459 Args:
460 v_map: Array of paths.
461 text1: Old string fragment to be diffed.
462 text2: New string fragment to be diffed.
463
464 Returns:
465 Array of diff tuples.
466 """
467 path = []
468 x = len(text1)
469 y = len(text2)
470 last_op = None
471 for d in xrange(len(v_map) - 2, -1, -1):
472 while True:
473 if (x - 1 << 32) + y in v_map[d]:
474 x -= 1
475 if last_op == self.DIFF_DELETE:
476 path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
477 else:
478 path.append((self.DIFF_DELETE, text1[-x - 1]))
479 last_op = self.DIFF_DELETE
480 break
481 elif (x << 32) + y - 1 in v_map[d]:
482 y -= 1
483 if last_op == self.DIFF_INSERT:
484 path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
485 else:
486 path.append((self.DIFF_INSERT, text2[-y - 1]))
487 last_op = self.DIFF_INSERT
488 break
489 else:
490 x -= 1
491 y -= 1
492 assert text1[-x - 1] == text2[-y - 1], ("No diagonal. " +
493 "Can't happen. (diff_path2)")
494 if last_op == self.DIFF_EQUAL:
495 path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
496 else:
497 path.append((self.DIFF_EQUAL, text1[-x - 1]))
498 last_op = self.DIFF_EQUAL
499 return path
500
502 """Determine the common prefix of two strings.
503
504 Args:
505 text1: First string.
506 text2: Second string.
507
508 Returns:
509 The number of characters common to the start of each string.
510 """
511
512 if not text1 or not text2 or text1[0] != text2[0]:
513 return 0
514
515
516 pointermin = 0
517 pointermax = min(len(text1), len(text2))
518 pointermid = pointermax
519 pointerstart = 0
520 while pointermin < pointermid:
521 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
522 pointermin = pointermid
523 pointerstart = pointermin
524 else:
525 pointermax = pointermid
526 pointermid = int((pointermax - pointermin) / 2 + pointermin)
527 return pointermid
528
530 """Determine the common suffix of two strings.
531
532 Args:
533 text1: First string.
534 text2: Second string.
535
536 Returns:
537 The number of characters common to the end of each string.
538 """
539
540 if not text1 or not text2 or text1[-1] != text2[-1]:
541 return 0
542
543
544 pointermin = 0
545 pointermax = min(len(text1), len(text2))
546 pointermid = pointermax
547 pointerend = 0
548 while pointermin < pointermid:
549 if (text1[-pointermid:len(text1) - pointerend] ==
550 text2[-pointermid:len(text2) - pointerend]):
551 pointermin = pointermid
552 pointerend = pointermin
553 else:
554 pointermax = pointermid
555 pointermid = int((pointermax - pointermin) / 2 + pointermin)
556 return pointermid
557
559 """Do the two texts share a substring which is at least half the length of
560 the longer text?
561
562 Args:
563 text1: First string.
564 text2: Second string.
565
566 Returns:
567 Five element Array, containing the prefix of text1, the suffix of text1,
568 the prefix of text2, the suffix of text2 and the common middle. Or None
569 if there was no match.
570 """
571 if len(text1) > len(text2):
572 (longtext, shorttext) = (text1, text2)
573 else:
574 (shorttext, longtext) = (text1, text2)
575 if len(longtext) < 10 or len(shorttext) < 1:
576 return None
577
578 def diff_halfMatchI(longtext, shorttext, i):
579 """Does a substring of shorttext exist within longtext such that the
580 substring is at least half the length of longtext?
581 Closure, but does not reference any external variables.
582
583 Args:
584 longtext: Longer string.
585 shorttext: Shorter string.
586 i: Start index of quarter length substring within longtext.
587
588 Returns:
589 Five element Array, containing the prefix of longtext, the suffix of
590 longtext, the prefix of shorttext, the suffix of shorttext and the
591 common middle. Or None if there was no match.
592 """
593 seed = longtext[i:i + len(longtext) / 4]
594 best_common = ''
595 j = shorttext.find(seed)
596 while j != -1:
597 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
598 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
599 if len(best_common) < suffixLength + prefixLength:
600 best_common = (shorttext[j - suffixLength:j] +
601 shorttext[j:j + prefixLength])
602 best_longtext_a = longtext[:i - suffixLength]
603 best_longtext_b = longtext[i + prefixLength:]
604 best_shorttext_a = shorttext[:j - suffixLength]
605 best_shorttext_b = shorttext[j + prefixLength:]
606 j = shorttext.find(seed, j + 1)
607
608 if len(best_common) >= len(longtext) / 2:
609 return (best_longtext_a, best_longtext_b,
610 best_shorttext_a, best_shorttext_b, best_common)
611 else:
612 return None
613
614
615 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
616
617 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
618 if not hm1 and not hm2:
619 return None
620 elif not hm2:
621 hm = hm1
622 elif not hm1:
623 hm = hm2
624 else:
625
626 if len(hm1[4]) > len(hm2[4]):
627 hm = hm1
628 else:
629 hm = hm2
630
631
632 if len(text1) > len(text2):
633 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
634 else:
635 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
636 return (text1_a, text1_b, text2_a, text2_b, mid_common)
637
639 """Reduce the number of edits by eliminating semantically trivial
640 equalities.
641
642 Args:
643 diffs: Array of diff tuples.
644 """
645 changes = False
646 equalities = []
647 lastequality = None
648 pointer = 0
649 length_changes1 = 0
650 length_changes2 = 0
651 while pointer < len(diffs):
652 if diffs[pointer][0] == self.DIFF_EQUAL:
653 equalities.append(pointer)
654 length_changes1 = length_changes2
655 length_changes2 = 0
656 lastequality = diffs[pointer][1]
657 else:
658 length_changes2 += len(diffs[pointer][1])
659 if (lastequality != None and (len(lastequality) <= length_changes1) and
660 (len(lastequality) <= length_changes2)):
661
662 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
663
664 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
665 diffs[equalities[-1] + 1][1])
666
667 equalities.pop()
668
669 if len(equalities) != 0:
670 equalities.pop()
671 if len(equalities):
672 pointer = equalities[-1]
673 else:
674 pointer = -1
675 length_changes1 = 0
676 length_changes2 = 0
677 lastequality = None
678 changes = True
679 pointer += 1
680
681 if changes:
682 self.diff_cleanupMerge(diffs)
683
684 self.diff_cleanupSemanticLossless(diffs)
685
687 """Look for single edits surrounded on both sides by equalities
688 which can be shifted sideways to align the edit to a word boundary.
689 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
690
691 Args:
692 diffs: Array of diff tuples.
693 """
694
695 def diff_cleanupSemanticScore(one, two):
696 """Given two strings, compute a score representing whether the
697 internal boundary falls on logical boundaries.
698 Scores range from 5 (best) to 0 (worst).
699 Closure, but does not reference any external variables.
700
701 Args:
702 one: First string.
703 two: Second string.
704
705 Returns:
706 The score.
707 """
708 if not one or not two:
709
710 return 5
711
712
713
714
715
716
717 score = 0
718
719 if not one[-1].isalnum() or not two[0].isalnum():
720 score += 1
721
722 if one[-1].isspace() or two[0].isspace():
723 score += 1
724
725 if (one[-1] == "\r" or one[-1] == "\n" or
726 two[0] == "\r" or two[0] == "\n"):
727 score += 1
728
729 if (re.search("\\n\\r?\\n$", one) or
730 re.match("^\\r?\\n\\r?\\n", two)):
731 score += 1
732 return score
733
734 pointer = 1
735
736 while pointer < len(diffs) - 1:
737 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
738 diffs[pointer + 1][0] == self.DIFF_EQUAL):
739
740 equality1 = diffs[pointer - 1][1]
741 edit = diffs[pointer][1]
742 equality2 = diffs[pointer + 1][1]
743
744
745 commonOffset = self.diff_commonSuffix(equality1, edit)
746 if commonOffset:
747 commonString = edit[-commonOffset:]
748 equality1 = equality1[:-commonOffset]
749 edit = commonString + edit[:-commonOffset]
750 equality2 = commonString + equality2
751
752
753 bestEquality1 = equality1
754 bestEdit = edit
755 bestEquality2 = equality2
756 bestScore = (diff_cleanupSemanticScore(equality1, edit) +
757 diff_cleanupSemanticScore(edit, equality2))
758 while edit and equality2 and edit[0] == equality2[0]:
759 equality1 += edit[0]
760 edit = edit[1:] + equality2[0]
761 equality2 = equality2[1:]
762 score = (diff_cleanupSemanticScore(equality1, edit) +
763 diff_cleanupSemanticScore(edit, equality2))
764
765 if score >= bestScore:
766 bestScore = score
767 bestEquality1 = equality1
768 bestEdit = edit
769 bestEquality2 = equality2
770
771 if diffs[pointer - 1][1] != bestEquality1:
772
773 if bestEquality1:
774 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
775 else:
776 del diffs[pointer - 1]
777 pointer -= 1
778 diffs[pointer] = (diffs[pointer][0], bestEdit)
779 if bestEquality2:
780 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
781 else:
782 del diffs[pointer + 1]
783 pointer -= 1
784 pointer += 1
785
787 """Reduce the number of edits by eliminating operationally trivial
788 equalities.
789
790 Args:
791 diffs: Array of diff tuples.
792 """
793 changes = False
794 equalities = []
795 lastequality = ''
796 pointer = 0
797 pre_ins = False
798 pre_del = False
799 post_ins = False
800 post_del = False
801 while pointer < len(diffs):
802 if diffs[pointer][0] == self.DIFF_EQUAL:
803 if (len(diffs[pointer][1]) < self.Diff_EditCost and
804 (post_ins or post_del)):
805
806 equalities.append(pointer)
807 pre_ins = post_ins
808 pre_del = post_del
809 lastequality = diffs[pointer][1]
810 else:
811
812 equalities = []
813 lastequality = ''
814
815 post_ins = post_del = False
816 else:
817 if diffs[pointer][0] == self.DIFF_DELETE:
818 post_del = True
819 else:
820 post_ins = True
821
822
823
824
825
826
827
828
829 if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
830 ((len(lastequality) < self.Diff_EditCost / 2) and
831 (pre_ins + pre_del + post_ins + post_del) == 3)):
832
833 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
834
835 diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
836 diffs[equalities[-1] + 1][1])
837 equalities.pop()
838 lastequality = ''
839 if pre_ins and pre_del:
840
841 post_ins = post_del = True
842 equalities = []
843 else:
844 if len(equalities):
845 equalities.pop()
846 if len(equalities):
847 pointer = equalities[-1]
848 else:
849 pointer = -1
850 post_ins = post_del = False
851 changes = True
852 pointer += 1
853
854 if changes:
855 self.diff_cleanupMerge(diffs)
856
858 """Reorder and merge like edit sections. Merge equalities.
859 Any edit section can move as long as it doesn't cross an equality.
860
861 Args:
862 diffs: Array of diff tuples.
863 """
864 diffs.append((self.DIFF_EQUAL, ''))
865 pointer = 0
866 count_delete = 0
867 count_insert = 0
868 text_delete = ''
869 text_insert = ''
870 while pointer < len(diffs):
871 if diffs[pointer][0] == self.DIFF_INSERT:
872 count_insert += 1
873 text_insert += diffs[pointer][1]
874 pointer += 1
875 elif diffs[pointer][0] == self.DIFF_DELETE:
876 count_delete += 1
877 text_delete += diffs[pointer][1]
878 pointer += 1
879 elif diffs[pointer][0] == self.DIFF_EQUAL:
880
881 if count_delete != 0 or count_insert != 0:
882 if count_delete != 0 and count_insert != 0:
883
884 commonlength = self.diff_commonPrefix(text_insert, text_delete)
885 if commonlength != 0:
886 x = pointer - count_delete - count_insert - 1
887 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
888 diffs[x] = (diffs[x][0], diffs[x][1] +
889 text_insert[:commonlength])
890 else:
891 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
892 pointer += 1
893 text_insert = text_insert[commonlength:]
894 text_delete = text_delete[commonlength:]
895
896 commonlength = self.diff_commonSuffix(text_insert, text_delete)
897 if commonlength != 0:
898 diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
899 diffs[pointer][1])
900 text_insert = text_insert[:-commonlength]
901 text_delete = text_delete[:-commonlength]
902
903 if count_delete == 0:
904 diffs[pointer - count_insert : pointer] = [
905 (self.DIFF_INSERT, text_insert)]
906 elif count_insert == 0:
907 diffs[pointer - count_delete : pointer] = [
908 (self.DIFF_DELETE, text_delete)]
909 else:
910 diffs[pointer - count_delete - count_insert : pointer] = [
911 (self.DIFF_DELETE, text_delete),
912 (self.DIFF_INSERT, text_insert)]
913 pointer = pointer - count_delete - count_insert + 1
914 if count_delete != 0:
915 pointer += 1
916 if count_insert != 0:
917 pointer += 1
918 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
919
920 diffs[pointer - 1] = (diffs[pointer - 1][0],
921 diffs[pointer - 1][1] + diffs[pointer][1])
922 del diffs[pointer]
923 else:
924 pointer += 1
925
926 count_insert = 0
927 count_delete = 0
928 text_delete = ''
929 text_insert = ''
930
931 if diffs[-1][1] == '':
932 diffs.pop()
933
934
935
936
937 changes = False
938 pointer = 1
939
940 while pointer < len(diffs) - 1:
941 if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
942 diffs[pointer + 1][0] == self.DIFF_EQUAL):
943
944 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
945
946 diffs[pointer] = (diffs[pointer][0],
947 diffs[pointer - 1][1] +
948 diffs[pointer][1][:-len(diffs[pointer - 1][1])])
949 diffs[pointer + 1] = (diffs[pointer + 1][0],
950 diffs[pointer - 1][1] + diffs[pointer + 1][1])
951 del diffs[pointer - 1]
952 changes = True
953 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
954
955 diffs[pointer - 1] = (diffs[pointer - 1][0],
956 diffs[pointer - 1][1] + diffs[pointer + 1][1])
957 diffs[pointer] = (diffs[pointer][0],
958 diffs[pointer][1][len(diffs[pointer + 1][1]):] +
959 diffs[pointer + 1][1])
960 del diffs[pointer + 1]
961 changes = True
962 pointer += 1
963
964
965 if changes:
966 self.diff_cleanupMerge(diffs)
967
969 """loc is a location in text1, compute and return the equivalent location
970 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
971
972 Args:
973 diffs: Array of diff tuples.
974 loc: Location within text1.
975
976 Returns:
977 Location within text2.
978 """
979 chars1 = 0
980 chars2 = 0
981 last_chars1 = 0
982 last_chars2 = 0
983 for x in xrange(len(diffs)):
984 (op, text) = diffs[x]
985 if op != self.DIFF_INSERT:
986 chars1 += len(text)
987 if op != self.DIFF_DELETE:
988 chars2 += len(text)
989 if chars1 > loc:
990 break
991 last_chars1 = chars1
992 last_chars2 = chars2
993
994 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
995
996 return last_chars2
997
998 return last_chars2 + (loc - last_chars1)
999
1001 """Convert a diff array into a pretty HTML report.
1002
1003 Args:
1004 diffs: Array of diff tuples.
1005
1006 Returns:
1007 HTML representation.
1008 """
1009 html = []
1010 i = 0
1011 for (op, data) in diffs:
1012 text = (data.replace("&", "&").replace("<", "<")
1013 .replace(">", ">").replace("\n", "¶<BR>"))
1014 if op == self.DIFF_INSERT:
1015 html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
1016 % (i, text))
1017 elif op == self.DIFF_DELETE:
1018 html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
1019 % (i, text))
1020 elif op == self.DIFF_EQUAL:
1021 html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1022 if op != self.DIFF_DELETE:
1023 i += len(data)
1024 return "".join(html)
1025
1026 - def diff_text1(self, diffs):
1027 """Compute and return the source text (all equalities and deletions).
1028
1029 Args:
1030 diffs: Array of diff tuples.
1031
1032 Returns:
1033 Source text.
1034 """
1035 text = []
1036 for (op, data) in diffs:
1037 if op != self.DIFF_INSERT:
1038 text.append(data)
1039 return "".join(text)
1040
1041 - def diff_text2(self, diffs):
1042 """Compute and return the destination text (all equalities and insertions).
1043
1044 Args:
1045 diffs: Array of diff tuples.
1046
1047 Returns:
1048 Destination text.
1049 """
1050 text = []
1051 for (op, data) in diffs:
1052 if op != self.DIFF_DELETE:
1053 text.append(data)
1054 return "".join(text)
1055
1057 """Compute the Levenshtein distance; the number of inserted, deleted or
1058 substituted characters.
1059
1060 Args:
1061 diffs: Array of diff tuples.
1062
1063 Returns:
1064 Number of changes.
1065 """
1066 levenshtein = 0
1067 insertions = 0
1068 deletions = 0
1069 for (op, data) in diffs:
1070 if op == self.DIFF_INSERT:
1071 insertions += len(data)
1072 elif op == self.DIFF_DELETE:
1073 deletions += len(data)
1074 elif op == self.DIFF_EQUAL:
1075
1076 levenshtein += max(insertions, deletions)
1077 insertions = 0
1078 deletions = 0
1079 levenshtein += max(insertions, deletions)
1080 return levenshtein
1081
1083 """Crush the diff into an encoded string which describes the operations
1084 required to transform text1 into text2.
1085 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1086 Operations are tab-separated. Inserted text is escaped using %xx notation.
1087
1088 Args:
1089 diffs: Array of diff tuples.
1090
1091 Returns:
1092 Delta text.
1093 """
1094 text = []
1095 for (op, data) in diffs:
1096 if op == self.DIFF_INSERT:
1097
1098 data = data.encode("utf-8")
1099 text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1100 elif op == self.DIFF_DELETE:
1101 text.append("-%d" % len(data))
1102 elif op == self.DIFF_EQUAL:
1103 text.append("=%d" % len(data))
1104 return "\t".join(text)
1105
1107 """Given the original text1, and an encoded string which describes the
1108 operations required to transform text1 into text2, compute the full diff.
1109
1110 Args:
1111 text1: Source string for the diff.
1112 delta: Delta text.
1113
1114 Returns:
1115 Array of diff tuples.
1116
1117 Raises:
1118 ValueError: If invalid input.
1119 """
1120 if type(delta) == unicode:
1121
1122
1123 delta = delta.encode("ascii")
1124 diffs = []
1125 pointer = 0
1126 tokens = delta.split("\t")
1127 for token in tokens:
1128 if token == "":
1129
1130 continue
1131
1132
1133 param = token[1:]
1134 if token[0] == "+":
1135 param = urllib.unquote(param).decode("utf-8")
1136 diffs.append((self.DIFF_INSERT, param))
1137 elif token[0] == "-" or token[0] == "=":
1138 try:
1139 n = int(param)
1140 except ValueError:
1141 raise ValueError("Invalid number in diff_fromDelta: " + param)
1142 if n < 0:
1143 raise ValueError("Negative number in diff_fromDelta: " + param)
1144 text = text1[pointer : pointer + n]
1145 pointer += n
1146 if token[0] == "=":
1147 diffs.append((self.DIFF_EQUAL, text))
1148 else:
1149 diffs.append((self.DIFF_DELETE, text))
1150 else:
1151
1152 raise ValueError("Invalid diff operation in diff_fromDelta: " +
1153 token[0])
1154 if pointer != len(text1):
1155 raise ValueError(
1156 "Delta length (%d) does not equal source text length (%d)." %
1157 (pointer, len(text1)))
1158 return diffs
1159
1160
1161
1162 - def match_main(self, text, pattern, loc):
1163 """Locate the best instance of 'pattern' in 'text' near 'loc'.
1164
1165 Args:
1166 text: The text to search.
1167 pattern: The pattern to search for.
1168 loc: The location to search around.
1169
1170 Returns:
1171 Best match index or -1.
1172 """
1173
1174 if text == None or pattern == None:
1175 raise ValueError("Null inputs. (match_main)")
1176
1177 loc = max(0, min(loc, len(text)))
1178 if text == pattern:
1179
1180 return 0
1181 elif not text:
1182
1183 return -1
1184 elif text[loc:loc + len(pattern)] == pattern:
1185
1186 return loc
1187 else:
1188
1189 match = self.match_bitap(text, pattern, loc)
1190 return match
1191
1193 """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1194 Bitap algorithm.
1195
1196 Args:
1197 text: The text to search.
1198 pattern: The pattern to search for.
1199 loc: The location to search around.
1200
1201 Returns:
1202 Best match index or -1.
1203 """
1204
1205
1206
1207
1208
1209 s = self.match_alphabet(pattern)
1210
1211 def match_bitapScore(e, x):
1212 """Compute and return the score for a match with e errors and x location.
1213 Accesses loc and pattern through being a closure.
1214
1215 Args:
1216 e: Number of errors in match.
1217 x: Location of match.
1218
1219 Returns:
1220 Overall score for match (0.0 = good, 1.0 = bad).
1221 """
1222 accuracy = float(e) / len(pattern)
1223 proximity = abs(loc - x)
1224 if not self.Match_Distance:
1225
1226 return proximity and 1.0 or accuracy
1227 return accuracy + (proximity / float(self.Match_Distance))
1228
1229
1230 score_threshold = self.Match_Threshold
1231
1232 best_loc = text.find(pattern, loc)
1233 if best_loc != -1:
1234 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1235
1236 best_loc = text.rfind(pattern, loc + len(pattern))
1237 if best_loc != -1:
1238 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1239
1240
1241 matchmask = 1 << (len(pattern) - 1)
1242 best_loc = -1
1243
1244 bin_max = len(pattern) + len(text)
1245
1246 last_rd = None
1247 for d in xrange(len(pattern)):
1248
1249
1250
1251 bin_min = 0
1252 bin_mid = bin_max
1253 while bin_min < bin_mid:
1254 if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1255 bin_min = bin_mid
1256 else:
1257 bin_max = bin_mid
1258 bin_mid = (bin_max - bin_min) / 2 + bin_min
1259
1260
1261 bin_max = bin_mid
1262 start = max(1, loc - bin_mid + 1)
1263 finish = min(loc + bin_mid, len(text)) + len(pattern)
1264
1265 rd = range(finish + 1)
1266 rd.append((1 << d) - 1)
1267 for j in xrange(finish, start - 1, -1):
1268 if len(text) <= j - 1:
1269
1270 charMatch = 0
1271 else:
1272 charMatch = s.get(text[j - 1], 0)
1273 if d == 0:
1274 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1275 else:
1276 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1277 ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1278 if rd[j] & matchmask:
1279 score = match_bitapScore(d, j - 1)
1280
1281
1282 if score <= score_threshold:
1283
1284 score_threshold = score
1285 best_loc = j - 1
1286 if best_loc > loc:
1287
1288 start = max(1, 2 * loc - best_loc)
1289 else:
1290
1291 break
1292
1293 if match_bitapScore(d + 1, loc) > score_threshold:
1294 break
1295 last_rd = rd
1296 return best_loc
1297
1299 """Initialise the alphabet for the Bitap algorithm.
1300
1301 Args:
1302 pattern: The text to encode.
1303
1304 Returns:
1305 Hash of character locations.
1306 """
1307 s = {}
1308 for char in pattern:
1309 s[char] = 0
1310 for i in xrange(len(pattern)):
1311 s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1312 return s
1313
1314
1315
1316 - def patch_addContext(self, patch, text):
1317 """Increase the context until it is unique,
1318 but don't let the pattern expand beyond Match_MaxBits.
1319
1320 Args:
1321 patch: The patch to grow.
1322 text: Source text.
1323 """
1324 if len(text) == 0:
1325 return
1326 pattern = text[patch.start2 : patch.start2 + patch.length1]
1327 padding = 0
1328
1329
1330
1331 while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1332 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1333 self.Patch_Margin)):
1334 padding += self.Patch_Margin
1335 pattern = text[max(0, patch.start2 - padding) :
1336 patch.start2 + patch.length1 + padding]
1337
1338 padding += self.Patch_Margin
1339
1340
1341 prefix = text[max(0, patch.start2 - padding) : patch.start2]
1342 if prefix:
1343 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1344
1345 suffix = text[patch.start2 + patch.length1 :
1346 patch.start2 + patch.length1 + padding]
1347 if suffix:
1348 patch.diffs.append((self.DIFF_EQUAL, suffix))
1349
1350
1351 patch.start1 -= len(prefix)
1352 patch.start2 -= len(prefix)
1353
1354 patch.length1 += len(prefix) + len(suffix)
1355 patch.length2 += len(prefix) + len(suffix)
1356
1358 """Compute a list of patches to turn text1 into text2.
1359 Use diffs if provided, otherwise compute it ourselves.
1360 There are four ways to call this function, depending on what data is
1361 available to the caller:
1362 Method 1:
1363 a = text1, b = text2
1364 Method 2:
1365 a = diffs
1366 Method 3 (optimal):
1367 a = text1, b = diffs
1368 Method 4 (deprecated, use method 3):
1369 a = text1, b = text2, c = diffs
1370
1371 Args:
1372 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1373 text2 (method 2).
1374 b: text2 (methods 1,4) or Array of diff tuples for text1 to
1375 text2 (method 3) or undefined (method 2).
1376 c: Array of diff tuples for text1 to text2 (method 4) or
1377 undefined (methods 1,2,3).
1378
1379 Returns:
1380 Array of patch objects.
1381 """
1382 text1 = None
1383 diffs = None
1384
1385 if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1386
1387
1388 text1 = a
1389 diffs = self.diff_main(text1, b, True)
1390 if len(diffs) > 2:
1391 self.diff_cleanupSemantic(diffs)
1392 self.diff_cleanupEfficiency(diffs)
1393 elif isinstance(a, list) and b is None and c is None:
1394
1395
1396 diffs = a
1397 text1 = self.diff_text1(diffs)
1398 elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1399
1400 text1 = a
1401 diffs = b
1402 elif (isinstance(a, basestring) and isinstance(b, basestring) and
1403 isinstance(c, list)):
1404
1405
1406 text1 = a
1407 diffs = c
1408 else:
1409 raise ValueError("Unknown call format to patch_make.")
1410
1411 if not diffs:
1412 return []
1413 patches = []
1414 patch = patch_obj()
1415 char_count1 = 0
1416 char_count2 = 0
1417 prepatch_text = text1
1418 postpatch_text = text1
1419 for x in xrange(len(diffs)):
1420 (diff_type, diff_text) = diffs[x]
1421 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1422
1423 patch.start1 = char_count1
1424 patch.start2 = char_count2
1425 if diff_type == self.DIFF_INSERT:
1426
1427 patch.diffs.append(diffs[x])
1428 patch.length2 += len(diff_text)
1429 postpatch_text = (postpatch_text[:char_count2] + diff_text +
1430 postpatch_text[char_count2:])
1431 elif diff_type == self.DIFF_DELETE:
1432
1433 patch.length1 += len(diff_text)
1434 patch.diffs.append(diffs[x])
1435 postpatch_text = (postpatch_text[:char_count2] +
1436 postpatch_text[char_count2 + len(diff_text):])
1437 elif (diff_type == self.DIFF_EQUAL and
1438 len(diff_text) <= 2 * self.Patch_Margin and
1439 len(patch.diffs) != 0 and len(diffs) != x + 1):
1440
1441 patch.diffs.append(diffs[x])
1442 patch.length1 += len(diff_text)
1443 patch.length2 += len(diff_text)
1444
1445 if (diff_type == self.DIFF_EQUAL and
1446 len(diff_text) >= 2 * self.Patch_Margin):
1447
1448 if len(patch.diffs) != 0:
1449 self.patch_addContext(patch, prepatch_text)
1450 patches.append(patch)
1451 patch = patch_obj()
1452
1453
1454
1455
1456 prepatch_text = postpatch_text
1457 char_count1 = char_count2
1458
1459
1460 if diff_type != self.DIFF_INSERT:
1461 char_count1 += len(diff_text)
1462 if diff_type != self.DIFF_DELETE:
1463 char_count2 += len(diff_text)
1464
1465
1466 if len(patch.diffs) != 0:
1467 self.patch_addContext(patch, prepatch_text)
1468 patches.append(patch)
1469 return patches
1470
1472 """Given an array of patches, return another array that is identical.
1473
1474 Args:
1475 patches: Array of patch objects.
1476
1477 Returns:
1478 Array of patch objects.
1479 """
1480 patchesCopy = []
1481 for patch in patches:
1482 patchCopy = patch_obj()
1483
1484 patchCopy.diffs = patch.diffs[:]
1485 patchCopy.start1 = patch.start1
1486 patchCopy.start2 = patch.start2
1487 patchCopy.length1 = patch.length1
1488 patchCopy.length2 = patch.length2
1489 patchesCopy.append(patchCopy)
1490 return patchesCopy
1491
1493 """Merge a set of patches onto the text. Return a patched text, as well
1494 as a list of true/false values indicating which patches were applied.
1495
1496 Args:
1497 patches: Array of patch objects.
1498 text: Old text.
1499
1500 Returns:
1501 Two element Array, containing the new text and an array of boolean values.
1502 """
1503 if not patches:
1504 return (text, [])
1505
1506
1507 patches = self.patch_deepCopy(patches)
1508
1509 nullPadding = self.patch_addPadding(patches)
1510 text = nullPadding + text + nullPadding
1511 self.patch_splitMax(patches)
1512
1513
1514
1515
1516
1517 delta = 0
1518 results = []
1519 for patch in patches:
1520 expected_loc = patch.start2 + delta
1521 text1 = self.diff_text1(patch.diffs)
1522 end_loc = -1
1523 if len(text1) > self.Match_MaxBits:
1524
1525
1526 start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1527 expected_loc)
1528 if start_loc != -1:
1529 end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1530 expected_loc + len(text1) - self.Match_MaxBits)
1531 if end_loc == -1 or start_loc >= end_loc:
1532
1533 start_loc = -1
1534 else:
1535 start_loc = self.match_main(text, text1, expected_loc)
1536 if start_loc == -1:
1537
1538 results.append(False)
1539
1540 delta -= patch.length2 - patch.length1
1541 else:
1542
1543 results.append(True)
1544 delta = start_loc - expected_loc
1545 if end_loc == -1:
1546 text2 = text[start_loc : start_loc + len(text1)]
1547 else:
1548 text2 = text[start_loc : end_loc + self.Match_MaxBits]
1549 if text1 == text2:
1550
1551 text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1552 text[start_loc + len(text1):])
1553 else:
1554
1555
1556 diffs = self.diff_main(text1, text2, False)
1557 if (len(text1) > self.Match_MaxBits and
1558 self.diff_levenshtein(diffs) / float(len(text1)) >
1559 self.Patch_DeleteThreshold):
1560
1561 results[-1] = False
1562 else:
1563 self.diff_cleanupSemanticLossless(diffs)
1564 index1 = 0
1565 for (op, data) in patch.diffs:
1566 if op != self.DIFF_EQUAL:
1567 index2 = self.diff_xIndex(diffs, index1)
1568 if op == self.DIFF_INSERT:
1569 text = text[:start_loc + index2] + data + text[start_loc +
1570 index2:]
1571 elif op == self.DIFF_DELETE:
1572 text = text[:start_loc + index2] + text[start_loc +
1573 self.diff_xIndex(diffs, index1 + len(data)):]
1574 if op != self.DIFF_DELETE:
1575 index1 += len(data)
1576
1577 text = text[len(nullPadding):-len(nullPadding)]
1578 return (text, results)
1579
1581 """Add some padding on text start and end so that edges can match
1582 something. Intended to be called only from within patch_apply.
1583
1584 Args:
1585 patches: Array of patch objects.
1586
1587 Returns:
1588 The padding string added to each side.
1589 """
1590 paddingLength = self.Patch_Margin
1591 nullPadding = ""
1592 for x in xrange(1, paddingLength + 1):
1593 nullPadding += chr(x)
1594
1595
1596 for patch in patches:
1597 patch.start1 += paddingLength
1598 patch.start2 += paddingLength
1599
1600
1601 patch = patches[0]
1602 diffs = patch.diffs
1603 if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1604
1605 diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1606 patch.start1 -= paddingLength
1607 patch.start2 -= paddingLength
1608 patch.length1 += paddingLength
1609 patch.length2 += paddingLength
1610 elif paddingLength > len(diffs[0][1]):
1611
1612 extraLength = paddingLength - len(diffs[0][1])
1613 newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1614 diffs[0] = (diffs[0][0], newText)
1615 patch.start1 -= extraLength
1616 patch.start2 -= extraLength
1617 patch.length1 += extraLength
1618 patch.length2 += extraLength
1619
1620
1621 patch = patches[-1]
1622 diffs = patch.diffs
1623 if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1624
1625 diffs.append((self.DIFF_EQUAL, nullPadding))
1626 patch.length1 += paddingLength
1627 patch.length2 += paddingLength
1628 elif paddingLength > len(diffs[-1][1]):
1629
1630 extraLength = paddingLength - len(diffs[-1][1])
1631 newText = diffs[-1][1] + nullPadding[:extraLength]
1632 diffs[-1] = (diffs[-1][0], newText)
1633 patch.length1 += extraLength
1634 patch.length2 += extraLength
1635
1636 return nullPadding
1637
1639 """Look through the patches and break up any which are longer than the
1640 maximum limit of the match algorithm.
1641
1642 Args:
1643 patches: Array of patch objects.
1644 """
1645 if self.Match_MaxBits == 0:
1646 return
1647 for x in xrange(len(patches)):
1648 if patches[x].length1 > self.Match_MaxBits:
1649 bigpatch = patches[x]
1650
1651 del patches[x]
1652 x -= 1
1653 patch_size = self.Match_MaxBits
1654 start1 = bigpatch.start1
1655 start2 = bigpatch.start2
1656 precontext = ''
1657 while len(bigpatch.diffs) != 0:
1658
1659 patch = patch_obj()
1660 empty = True
1661 patch.start1 = start1 - len(precontext)
1662 patch.start2 = start2 - len(precontext)
1663 if precontext:
1664 patch.length1 = patch.length2 = len(precontext)
1665 patch.diffs.append((self.DIFF_EQUAL, precontext))
1666
1667 while (len(bigpatch.diffs) != 0 and
1668 patch.length1 < patch_size - self.Patch_Margin):
1669 (diff_type, diff_text) = bigpatch.diffs[0]
1670 if diff_type == self.DIFF_INSERT:
1671
1672 patch.length2 += len(diff_text)
1673 start2 += len(diff_text)
1674 patch.diffs.append(bigpatch.diffs.pop(0))
1675 empty = False
1676 elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1677 patch.diffs[0][0] == self.DIFF_EQUAL and
1678 len(diff_text) > 2 * patch_size):
1679
1680 patch.length1 += len(diff_text)
1681 start1 += len(diff_text)
1682 empty = False
1683 patch.diffs.append((diff_type, diff_text))
1684 del bigpatch.diffs[0]
1685 else:
1686
1687 diff_text = diff_text[:patch_size - patch.length1 -
1688 self.Patch_Margin]
1689 patch.length1 += len(diff_text)
1690 start1 += len(diff_text)
1691 if diff_type == self.DIFF_EQUAL:
1692 patch.length2 += len(diff_text)
1693 start2 += len(diff_text)
1694 else:
1695 empty = False
1696
1697 patch.diffs.append((diff_type, diff_text))
1698 if diff_text == bigpatch.diffs[0][1]:
1699 del bigpatch.diffs[0]
1700 else:
1701 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1702 bigpatch.diffs[0][1][len(diff_text):])
1703
1704
1705 precontext = self.diff_text2(patch.diffs)
1706 precontext = precontext[-self.Patch_Margin:]
1707
1708 postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1709 if postcontext:
1710 patch.length1 += len(postcontext)
1711 patch.length2 += len(postcontext)
1712 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1713 patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1714 postcontext)
1715 else:
1716 patch.diffs.append((self.DIFF_EQUAL, postcontext))
1717
1718 if not empty:
1719 x += 1
1720 patches.insert(x, patch)
1721
1722 - def patch_toText(self, patches):
1723 """Take a list of patches and return a textual representation.
1724
1725 Args:
1726 patches: Array of patch objects.
1727
1728 Returns:
1729 Text representation of patches.
1730 """
1731 text = []
1732 for patch in patches:
1733 text.append(str(patch))
1734 return "".join(text)
1735
1736 - def patch_fromText(self, textline):
1737 """Parse a textual representation of patches and return a list of patch
1738 objects.
1739
1740 Args:
1741 textline: Text representation of patches.
1742
1743 Returns:
1744 Array of patch objects.
1745
1746 Raises:
1747 ValueError: If invalid input.
1748 """
1749 if type(textline) == unicode:
1750
1751
1752 textline = textline.encode("ascii")
1753 patches = []
1754 if not textline:
1755 return patches
1756 text = textline.split('\n')
1757 while len(text) != 0:
1758 m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1759 if not m:
1760 raise ValueError("Invalid patch string: " + text[0])
1761 patch = patch_obj()
1762 patches.append(patch)
1763 patch.start1 = int(m.group(1))
1764 if m.group(2) == '':
1765 patch.start1 -= 1
1766 patch.length1 = 1
1767 elif m.group(2) == '0':
1768 patch.length1 = 0
1769 else:
1770 patch.start1 -= 1
1771 patch.length1 = int(m.group(2))
1772
1773 patch.start2 = int(m.group(3))
1774 if m.group(4) == '':
1775 patch.start2 -= 1
1776 patch.length2 = 1
1777 elif m.group(4) == '0':
1778 patch.length2 = 0
1779 else:
1780 patch.start2 -= 1
1781 patch.length2 = int(m.group(4))
1782
1783 del text[0]
1784
1785 while len(text) != 0:
1786 if text[0]:
1787 sign = text[0][0]
1788 else:
1789 sign = ''
1790 line = urllib.unquote(text[0][1:])
1791 line = line.decode("utf-8")
1792 if sign == '+':
1793
1794 patch.diffs.append((self.DIFF_INSERT, line))
1795 elif sign == '-':
1796
1797 patch.diffs.append((self.DIFF_DELETE, line))
1798 elif sign == ' ':
1799
1800 patch.diffs.append((self.DIFF_EQUAL, line))
1801 elif sign == '@':
1802
1803 break
1804 elif sign == '':
1805
1806 pass
1807 else:
1808
1809 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1810 del text[0]
1811 return patches
1812
1813
1815 """Class representing one patch operation.
1816 """
1817
1819 """Initializes with an empty list of diffs.
1820 """
1821 self.diffs = []
1822 self.start1 = None
1823 self.start2 = None
1824 self.length1 = 0
1825 self.length2 = 0
1826
1828 """Emmulate GNU diff's format.
1829 Header: @@ -382,8 +481,9 @@
1830 Indicies are printed as 1-based, not 0-based.
1831
1832 Returns:
1833 The GNU diff string.
1834 """
1835 if self.length1 == 0:
1836 coords1 = str(self.start1) + ",0"
1837 elif self.length1 == 1:
1838 coords1 = str(self.start1 + 1)
1839 else:
1840 coords1 = str(self.start1 + 1) + "," + str(self.length1)
1841 if self.length2 == 0:
1842 coords2 = str(self.start2) + ",0"
1843 elif self.length2 == 1:
1844 coords2 = str(self.start2 + 1)
1845 else:
1846 coords2 = str(self.start2 + 1) + "," + str(self.length2)
1847 text = ["@@ -", coords1, " +", coords2, " @@\n"]
1848
1849 for (op, data) in self.diffs:
1850 if op == diff_match_patch.DIFF_INSERT:
1851 text.append("+")
1852 elif op == diff_match_patch.DIFF_DELETE:
1853 text.append("-")
1854 elif op == diff_match_patch.DIFF_EQUAL:
1855 text.append(" ")
1856
1857 data = data.encode("utf-8")
1858 text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1859 return "".join(text)
1860