1 """
2 a module containing L{SimpleStringTree}, which represents an editable string
3 as a dependency tree of edits. This module exists to support L{UnorderedString}.
4
5 This module was developed in part as a Google Summer of Code 2009
6 project.
7 """
8 import random
9 random.seed()
10
11
12
13 from collections import defaultdict
14 import dbus
15 import logging
16 import aatree
17
18 inf = float('inf')
19
21 """Each Change represents a change to the StringTree.
22 """
23 - def __init__(self, unique_id, parent, edit):
24 """
25 @param unique_id: a unique identifier for this Change
26 @param parent: the unique_id that is affected by this
27 Change. Parent unique_id are always associated with an Insertion edit.
28 @param edit: an Insertion, Deletion, or Removal. It's what is to be done to
29 the parent"""
30 self.unique_id = unique_id
31 self.parent = parent
32 self.edit = edit
33
35 return "Change(%d, %d, %s)" % (self.unique_id, self.parent, str(self.edit))
36
38 """Represents the action of inserting a particular bit of text
39 """
41 """
42 @param position: the point at which to insert text, in the unmodified
43 coordinates of the parent node (other insertions and deletions
44 to that node do not affect these coordinates)
45 @type position: int
46 @param text: the string to insert
47 @type text: str
48 """
49 self.position = position
50 self.text = text
51
53 return "Insertion(%d, %s)" % (self.position, self.text)
54
56 """
57 Represents the deletion of only those characters present in the parent in
58 this range. Characters that have been inserted into the parent by
59 an Insertion are not affected.
60 """
62 """
63 @type position: int >= 0
64 @param position: the point, in the unmodified coordinates of the parent
65 node, at which to start deletion
66 @type length: int > 0
67 @param length: the number of characters to delete"""
68 self.position = position
69 self.length = length
70
72 return "Deletion(%d, %d)" % (self.position, self.length)
73
75 """
76 Not currently used.
77
78 Represents the deletion of all characters ever inserted in this range.
79 Insertions at the endpoints are _not_ included in the Removal, and must
80 be Removed separately.
81 """
83 """
84 @param position: the point at which to start Removal
85 @param length: the number of points in the unmodified parent coordinate to
86 the end of the Removal. Note that many more than length characters
87 can be removed if there are Insertions in this range."""
88 self.position = position
89 self.length = length
90
92 return "Removal(%d, %d)" % (self.position, self.length)
93
95 """
96 Each Record is used to store one Change inside the StringTree.
97 The purpose of a Record is contain both the Change itself, and any cached
98 information about the current effect of that Change.
99 """
101 """
102 @type change: L{Change}
103 @type depth: int
104 @param depth: the number of parent edits between this edit and the root
105 of the change tree. This state is maintained because the editing
106 algorithm requires that the parent of a new Change not have any
107 remaining descendants that could serve as parent instead.
108 """
109 self.change = change
110 self.depth = depth
111
113 return "Record(%s, %d)" % (str(self.change), self.depth)
114
115 __repr__ = __str__
116
118 """Disused. Recursively flattens nested lists."""
119 o = []
120 for x in L:
121 if isinstance(x,list):
122 o.extend(flatten(x))
123 else:
124 o.append(x)
125 return o
126
128 """@return: a large random number that will fit into a dbus.Int64"""
129 return random.getrandbits(63)
130
132 """A translator for L{Change}s"""
133 if pack:
134 if isinstance(c.edit, Insertion):
135 return dbus.Struct((dbus.Int64(c.unique_id),
136 dbus.Int64(c.parent),
137 dbus.Int16(0),
138 dbus.Int64(c.edit.position),
139 dbus.UTF8String(c.edit.text)),
140 signature='xxnxs')
141 elif isinstance(c.edit, Deletion):
142 return dbus.Struct((dbus.Int64(c.unique_id),
143 dbus.Int64(c.parent),
144 dbus.Int16(1),
145 dbus.Int64(c.edit.position),
146 dbus.Int64(c.edit.length)),
147 signature='xxnxx')
148 elif isinstance(c.edit, Removal):
149 return dbus.Struct((dbus.Int64(c.unique_id),
150 dbus.Int64(c.parent),
151 dbus.Int16(2),
152 dbus.Int64(c.edit.position),
153 dbus.Int64(c.edit.length)),
154 signature='xxnxx')
155 else:
156 raise Unimplemented
157 else:
158 if c[2] == 0:
159 ed = Insertion(int(c[3]),str(c[4]))
160 elif c[2] == 1:
161 ed = Deletion(int(c[3]),int(c[4]))
162 elif c[2] == 2:
163 ed = Removal(int(c[3]),int(c[4]))
164 else:
165 raise "unknown type"
166 return Change(int(c[0]), int(c[1]), ed)
167
169 """
170 EagerHideList provides a list with hidden elements. The standard index
171 considers only the visible elements, but the 'all' index considers all
172 elements. The standard index of an invisible element is considered to be
173 the index of the next visible element.
174
175 EagerHideList has S{Theta}(N) time requirements for random insertion and
176 deletion. L{TreeHideList} can perform these operations in S{Theta}(log(N))
177 time, and so is generally preferred. EagerHideList is no longer used by
178 SimpleStringTree."""
180 self._sourcelist = []
181 self._poslist = []
182 self._posmap = {}
184 return len(self._sourcelist)
188 return self._sourcelist[s]
190 x = self._poslist[self._posmap[item]]
191 return x[2]
192 - def hide(self, position, length):
193 """
194 Makes currently visible elements hidden.
195 @type position: int
196 @param position: a position in the standard index (i.e. excluding
197 hidden elements)
198 @type length: int > 0
199 @param length: the number of elements to hide
200 """
201 pfirst = self._sourcelist[position]
202 plast = self._sourcelist[position+length-1]
203 ifirst = self._posmap[pfirst]
204 ilast = self._posmap[plast]
205 for i in xrange(ifirst, ilast+1):
206 L = self._poslist[i]
207 L[1] = False
208 L[2] = position
209 for L in self._poslist[ilast+1:]:
210 L[2] -= length
211
212 del self._sourcelist[position:(position+length)]
213
215 if isinstance(s, int):
216 return self._poslist[s][0]
217 else:
218 return [x[0] for x in self._poslist[s]]
220 return self._posmap[item]
222 return self._poslist[i][1]
224 return self._poslist[self._posmap[item]][1]
226 """Insert sequence with visibility into the all-coordinates at position"""
227 if position > 0:
228 psource = self._poslist[position][2]
229 else:
230 psource = 0
231 length = len(sequence)
232 newlist = []
233 newlistsource = []
234 i = psource
235 for elt, viz in zip(sequence, visibility):
236 newlist.append([elt, viz, i])
237 if viz:
238 newlistsource.append(elt)
239 i += 1
240 self._poslist[position:position] = newlist
241 for i in xrange(position,position+length):
242 L = self._poslist[i]
243 self._posmap[L[0]] = i
244 num_viz = len(newlistsource)
245 for i in xrange(position+length,len(self._poslist)):
246 L = self._poslist[i]
247 L[2] += num_viz
248 self._posmap[L[0]] = i
249 self._sourcelist[psource:psource] = newlistsource
250
253
255 assert len(self._posmap) == len(self._poslist)
256 for i in xrange(len(self._poslist)):
257 assert self._posmap[self._poslist[i][0]] == i
258 if self._poslist[i][1]:
259 assert self._sourcelist[self._poslist[i][2]] == self._poslist[i][0]
260 if i > 0:
261 if self._poslist[i-1][1]:
262 assert self._poslist[i-1][2] + 1 == self._poslist[i][2]
263 else:
264 assert self._poslist[i-1][2] == self._poslist[i][2]
265
267 """
268 a class implementing an editable string, represented internally as a
269 dependency tree of L{Change}s.
270
271 Purpose
272 =======
273
274 The logical structure of a SimpleStringTree is derived from common version
275 control systems such as CVS. In a version control system, each edit is
276 represented as a patch, also known as a diff, that can be applied to the
277 previous version of a document to generate the next version. These patches
278 are typically generated on a line-by-line basis, with the line to be edited
279 described either by its absolute position, or by its position relative to
280 some "context" (i.e. nearby lines).
281
282 When two people submit patches that modify the same line, the two patches
283 are said to "conflict", because they cannot both be applied, at least not
284 while retaining a semblance of their original meaning. These conflicts are
285 reported to the user and must be resolved manually.
286
287 When editing computer source code, a conflict typically indicates that if
288 both edits are applied, the
289 program will crash. For human languages, however, the requirements are not
290 so strict.
291
292 In real-time collaborative text editing, conflicts can most
293 easily be resolved by applying both edits and then fixing the text within
294 the editor. This procedure only works if all editors see the same text
295 when a conflict occurs. Thus, the purpose of SimpleStringTree is to ensure
296 that, no matter what sequence of edits is made, all users who have seen all
297 edits are presented with the same string.
298
299 API
300 ===
301 SimpleStringTree provides a "native" API based on insert() and delete().
302 To ease testing, it additionally provides a number of methods such as seek()
303 and write() that mirror the API of python's StringIO module. These methods
304 are in place principally to enable direct testing against python's StringIO
305 module, which should produce identical results when these methods are used.
306
307 The name
308 ========
309 SimpleStringTree is "Simple" in that it supports only Insertions and
310 Deletions. Handling References, while valuable, has proven quite difficult,
311 and so will not be addressed by this data structure.
312
313 Some code for handling Removals will be left in for the moment, since it is
314 easy enough, even though it is not presently used."""
315
326
328 return "\n".join((str(v) for v in self._id2rec.values()))
329
331 if r is None:
332 r = self.ROOT
333 s = "".join(self._id2rec[x[0]].change.edit.text[x[1]] for x in self._listing)
334 return s
335
338
343
346
347 - def read(self, size=float('inf')):
348
349 s = self.getvalue()
350 outpoint = min(len(s), self._cursor + size)
351 inpoint = self._cursor
352 self._cursor = outpoint
353 return s[inpoint:outpoint]
354
356
357 s = self.getvalue()
358 outpoint = min(len(s), self._cursor + size)
359 inpoint = self._cursor
360 i = s.find("\n", inpoint, outpoint)
361 if i == -1 or i >= outpoint:
362 self._cursor = outpoint
363 return s[inpoint:outpoint]
364 else:
365 self._cursor = i + 1
366 return s[inpoint:(i+1)]
367
369
370 s = self.getvalue()
371 t = s[self._cursor:]
372 self._cursor = len(s)
373 return t.splitlines(True)
374
375 - def seek(self, offset, whence=0):
376 if whence == 0:
377 self._cursor = offset
378 elif whence == 1:
379 self._cursor += offset
380 elif whence == 2:
381 self._cursor = len(self._listing) + offset
382
385
387 if size is None:
388 size = self._cursor
389 return self.delete(size, len(self._listing) - size)
390
392 L = min(len(self._listing) - self._cursor, len(text))
393 changelist = []
394 if L > 0:
395 changelist.extend(self.delete(self._cursor, L))
396 changelist.extend(self.insert(text))
397 return changelist
398
400 s = "".join(sequence)
401 return self.write(s)
402
403
404
405 - def insert(self, text, k=None):
406 """
407 Insert text into the string, without overwriting any existing text.
408 @type text: str
409 @param text: the text to insert
410 @type k: int >= 0
411 @param k: the location at which to insert the text. If k is None,
412 then the current seek position is used
413 @raise None: raises an error if k is greater than the length of the
414 string.
415 """
416 if k is None:
417 k = self._cursor
418
419 if len(self._listing) == 0:
420 r = self.ROOT
421 uid = -1
422 inspoint = 0
423 elif k == 0:
424 (uid, inspoint) = self._listing[k]
425 r = self._id2rec[uid]
426 elif k < len(self._listing):
427
428
429
430
431
432
433 pL, pR = self._listing[(k-1):(k+1)]
434 (uidL, inspointL) = pL
435 (uidR, inspointR) = pR
436 rR = self._id2rec[uidR]
437 if uidL == uidR:
438
439
440 r = rR
441 uid = uidR
442 inspoint = inspointR
443 else:
444
445
446
447
448
449 rL = self._id2rec[uidL]
450 if rR.depth > rL.depth:
451 r = rR
452 uid = uidR
453 inspoint = inspointR
454 else:
455 r = rL
456 uid = uidL
457 inspoint = inspointL + 1
458 elif k == len(self._listing):
459 (uid,i) = self._listing[k-1]
460 r = self._id2rec[uid]
461 inspoint = i + 1
462 else:
463 raise
464
465 e = Insertion(inspoint, text)
466 c = Change(my_rand(), r.change.unique_id, e)
467 self._add_change_treeonly(c)
468 target = (uid, inspoint)
469 self._insert_listonly(c.unique_id, target, len(text))
470 self._cursor = k + len(text)
471 return [c]
472
474 """
475 Starting at a point k (0-indexed), delete n characters
476 @type k: int
477 @param k: the position at which to begin deletion. The character at
478 position k will be deleted.
479 @type n: int
480 @param n: the number of characters to delete
481 @raise None: raises an error if the deletion extends beyond the end of
482 the string."""
483 if k + n > len(self._listing):
484 raise
485 p = self._listing[k]
486 contigs = [[p[0],p[1],p[1]]]
487 for (uid, index) in self._listing[(k+1):(k+n)]:
488
489
490
491
492 if contigs[-1][0] == uid:
493 contigs[-1][2] = index
494 else:
495 contigs.append([uid,index,index])
496 changelist = [Change(my_rand(), c[0], Deletion(c[1],1 + c[2]-c[1])) for c in contigs]
497 for c in changelist:
498 self._add_change_treeonly(c)
499 self._delete_listonly(k,n)
500 return changelist
501
503 todo = [(rec, point, m, None)]
504 ranges = []
505 while len(todo) > 0:
506 (rec, point, m, p) = todo[0]
507 h = self._range_helper(point, m)
508 self._step(h, rec)
509 if h.outpoint is not None:
510 ranges.append((rec, h.point_parent, h.outpoint - h.point_parent))
511
512 todo.extend(h.todo)
513
514 del todo[0]
515 return ranges
516
517 - def move(self, rempoint, n, inspoint):
518 """
519 Move a substring. Equivalent to read(), delete(), and insert().
520 In StringTree, move() would ideally coherently copy a section of text,
521 such that any subsequent edits appear in the new location, not the old.
522 In SimpleStringTree, this is not possible, so move() just falls back to
523 Deletion and Insertion.
524 @type rempoint: int
525 @param rempoint: the point at which to start deletion
526 @type n: int
527 @param n: the number of characters to move
528 @param inspoint: the position at which to insert"""
529 self.seek(rempoint)
530 t = self.read(n)
531
532 if rempoint > inspoint:
533 L = self.delete(rempoint,n)
534 L.extend(self.insert(t, inspoint))
535 else:
536 L = self.insert(t, inspoint)
537 L.extend(self.delete(rempoint,n))
538 return L
539
540
541
543 """
544 Apply a change to the string.
545 @type c: L{Change}
546 @param c: a L{Change} to make to the string. c.parent MUST already
547 have been added.
548 """
549 if c.unique_id in self._id2rec:
550 return []
551 if isinstance(c.edit, Insertion):
552 p = self._effective_parent(c.unique_id, c.parent, c.edit.position)
553 i = self._listing.index(p)
554 d = len(c.edit.text)
555 self._insert_listonly(c.unique_id, p, d)
556 flist = [Insertion(i, c.edit.text)]
557 elif isinstance(c.edit, Deletion):
558 flist = []
559 start = None
560 end = None
561 for i in xrange(c.edit.position,c.edit.position + c.edit.length):
562 p = (c.parent, i)
563 if self._listing.is_visible_item(p):
564 q = self._listing.index(p)
565 if end == q:
566 end += 1
567 else:
568 if end is not None:
569 n = end - start
570 flist.append(Deletion(start,n))
571 self._delete_listonly(start,n)
572 start = q
573 end = start + 1
574 if end is not None:
575 n = end - start
576 flist.append(Deletion(start,n))
577 self._delete_listonly(start,n)
578 else:
579 raise
580 self._add_change_treeonly(c)
581 return flist
582
584 """
585 The effective parent of an insertion is defined as the (uid, loc) pair
586 that causes it to be inserted in the right location in the string. This
587 is only relevant in the event of a conflict, in which case the conflict
588 edits are required to be ordered from least uid to greatest uid. The
589 effective parent of a conflicted edit, then, is either the original pair,
590 or (u_next, 0), where u_next is the uid of the sibling directly to the right
591 of the input uid. That is to say, it is the least u greater than uid
592 that has the same position."""
593 if parentid in self._parent2children:
594 u_next = None
595 for r in self._parent2children[parentid]:
596 u = r.change.unique_id
597 p = r.change.edit.position
598 if (p == position) and isinstance(r.change.edit, Insertion) and (u > uid):
599 if (u_next is None) or (u < u_next):
600 u_next = u
601 if u_next is not None:
602 return (u_next, 0)
603 return (parentid, position)
604
606 """Given the position in _sourcelist, and the length of the deletion,
607 perform the deletion in the lists"""
608 self._listing.hide(position,length)
609
611 """Make a new insertion into the lists with uid and length at position
612 in _poslist"""
613 elts = [(uid,i) for i in xrange(length+1)]
614 visibility = [True] * length
615 visibility.append(False)
616 self._listing.insert_sequence_leftof(target, elts, visibility)
617
619 if c.unique_id in self._id2rec:
620 return
621 d = self._id2rec[c.parent].depth + 1
622 r = Record(c,d)
623 self._id2rec[c.unique_id] = r
624 if c.parent not in self._parent2children:
625 self._parent2children[c.parent] = set()
626 self._parent2children[c.parent].add(r)
627
629 """
630 FIXME: This method could be useful, but it no longer works
631 get_insert finds and returns the youngest insertion containing positions
632 k to k + n in the total coordinates of parent. If parent is unspecified,
633 then it is taken to be the root, meaning the coordinates in question
634 are the current document coordinates.
635
636 The return value is a tuple (rec, k_unmodified, k_modified), meaning
637 the record containing k and k + n, the position of k in rec's
638 unmodified coordinates, and the position of k in rec's
639 modified coordinates.
640
641 "containing" is defined so that a record with n characters
642 (labeled 0...n-1) can still be returned as containing k...k+n. In
643 other words, each Insertion contains both endpoints. However, an
644 Insertion that has been totally deleted is ignored entirely."""
645 if parent is None:
646 parent = self.ROOT
647
648 h = self._get_insert_helper(k, n)
649 self._step(h, parent)
650 while h.child is not None:
651 parent = h.child
652 h = self._get_insert_helper(h.child_k, n)
653 self._step(h, parent)
654
655 return (parent, h.k_in_parent, h.k)
656
658 """
659 get_changes provides a depth-first topologically sorted list of all
660 Changes in this Tree.
661 @rtype: iterable(L{Change})
662 @return: a list of changes to the tree, ordering according to a
663 topological sort of the dependency tree. This ordering guarantees
664 that no change appears until after all of its dependencies (i.e.
665 ancestors)."""
666 L = []
667 stack = [-1]
668 while stack:
669 x = self._id2rec[stack.pop()].change
670 L.append(x)
671 if x.unique_id in self._parent2children:
672 stack.extend(r.change.unique_id
673 for r in self._parent2children[x.unique_id])
674 return L
675
677 """
678 Determin whether a Change c may safely be added
679 to the Tree. Specifically, it checks whether c.parent is already known.
680 @rtype: bool
681 @return: c.parent is already present in the tree."""
682 return c.parent in self._id2rec
683