Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue#18: internal_co: Always use the internal logical lines of RCSStream instead of the flat plain text to reproduce the next revision. #22

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 33 additions & 21 deletions cvs2svn_lib/checkout_internal.py
Original file line number Diff line number Diff line change
@@ -102,7 +102,10 @@


class TextRecord(object):
"""Bookkeeping data for the text of a single CVSRevision."""
"""Bookkeeping data for the text of a single CVSRevision.

The text is stored as the list of logical lines manipulated
by RCSStream."""

__slots__ = ['id', 'refcount']

@@ -134,15 +137,23 @@ def decrement_refcount(self, text_record_db):
if self.refcount == 0:
text_record_db.discard(self.id)

def checkout(self, text_record_db):
def checkout_as_lines(self, text_record_db):
"""Workhorse of the checkout process.

Return the text for this revision, decrement our reference count,
and update the databases depending on whether there will be future
checkouts."""
Return the text for this revision as a list of logical lines,
decrement our reference count, and update the databases depending
on whether there will be future checkouts."""

raise NotImplementedError()

def checkout(self, text_record_db):
"""Return the text for this revision.

Just as checkout_as_lines(), but returns the text as a flat text
string."""

return "".join(self.checkout_as_lines(text_record_db))

def free(self, text_record_db):
"""This instance will never again be checked out; free it.

@@ -158,7 +169,7 @@ class FullTextRecord(TextRecord):
These records are used for revisions whose fulltext was determined
by the InternalRevisionCollector during FilterSymbolsPass. The
fulltext for such a revision is is stored in the delta_db as a
single string."""
list of logical lines manipulated by RCSStream."""

__slots__ = []

@@ -168,10 +179,10 @@ def __getstate__(self):
def __setstate__(self, state):
(self.id, self.refcount,) = state

def checkout(self, text_record_db):
text = text_record_db.delta_db[self.id]
def checkout_as_lines(self, text_record_db):
lines = text_record_db.delta_db[self.id]
self.decrement_refcount(text_record_db)
return text
return lines

def free(self, text_record_db):
del text_record_db.delta_db[self.id]
@@ -205,26 +216,26 @@ def __setstate__(self, state):
def increment_dependency_refcounts(self, text_record_db):
text_record_db[self.pred_id].refcount += 1

def checkout(self, text_record_db):
base_text = text_record_db[self.pred_id].checkout(text_record_db)
rcs_stream = RCSStream(base_text)
def checkout_as_lines(self, text_record_db):
base_lines = text_record_db[self.pred_id].checkout_as_lines(text_record_db)
rcs_stream = RCSStream(base_lines)
delta_text = text_record_db.delta_db[self.id]
rcs_stream.apply_diff(delta_text)
text = rcs_stream.get_text()
lines = rcs_stream.get_lines()
del rcs_stream
self.refcount -= 1
if self.refcount == 0:
# This text will never be needed again; just delete ourselves
# This lines will never be needed again; just delete ourselves
# without ever having stored the fulltext to the checkout
# database:
del text_record_db[self.id]
else:
# Store a new CheckedOutTextRecord in place of ourselves:
text_record_db.checkout_db['%x' % self.id] = text
text_record_db.checkout_db['%x' % self.id] = lines
new_text_record = CheckedOutTextRecord(self.id)
new_text_record.refcount = self.refcount
text_record_db.replace(new_text_record)
return text
return lines

def free(self, text_record_db):
del text_record_db.delta_db[self.id]
@@ -241,7 +252,8 @@ class CheckedOutTextRecord(TextRecord):

These records are used for revisions whose fulltext has been
computed already during OutputPass. The fulltext for such a
revision is stored in the text_record_db as a single string."""
revision is stored in the text_record_db as a list of logical
lines manipulated by RCSStream."""

__slots__ = []

@@ -251,10 +263,10 @@ def __getstate__(self):
def __setstate__(self, state):
(self.id, self.refcount,) = state

def checkout(self, text_record_db):
text = text_record_db.checkout_db['%x' % self.id]
def checkout_as_lines(self, text_record_db):
lines = text_record_db.checkout_db['%x' % self.id]
self.decrement_refcount(text_record_db)
return text
return lines

def free(self, text_record_db):
del text_record_db.checkout_db['%x' % self.id]
@@ -533,7 +545,7 @@ def set_revision_info(self, revision, log, text):
# This is revision 1.1. Write its fulltext:
text_record = FullTextRecord(cvs_rev_id)
self.revision_collector._writeout(
text_record, self._rcs_stream.get_text()
text_record, self._rcs_stream.get_lines()
)

# There will be no more trunk revisions delivered, so free the
10 changes: 9 additions & 1 deletion cvs2svn_lib/rcs_stream.py
Original file line number Diff line number Diff line change
@@ -174,13 +174,21 @@ class RCSStream:
def __init__(self, text):
"""Instantiate and initialize the file content with TEXT."""

self.set_text(text)
if isinstance(text, bytes):
self.set_text(text)
else:
self.set_lines(text)

def get_text(self):
"""Return the current file content."""

return "".join(self._lines)

def get_lines(self):
"""Return the current file content as list of logical lines."""

return self._lines

def set_lines(self, lines):
"""Set the current contents to the specified LINES.

45 changes: 40 additions & 5 deletions run-tests.py
Original file line number Diff line number Diff line change
@@ -3345,6 +3345,40 @@ def internal_co_keywords():
raise Failure()


@Cvs2SvnTestFunction
def internal_co_broken_rcsfile():
"check if internal co can handle broken delta"

# This is a test for GitHub issue 18.

# Check if internal co can handle broken delta which inserts unterminated
# line other than after last line. Although it does not seem such a
# "broken" RCS file could be generated by normal RCS/CVS operation,
# RCS/CVS can handle it.

### TODO: Once an unterminated line is inserted in a middle of lines
### in line buffer, there can be more critical situations than one
### that the irregular.txt,v has. So we need to add such test data.

rcs_conv = ensure_conversion(
'gh-issue-18-irregular-rcsfile',
args=['--use-rcs', '--default-eol=native'],
dumpfile='broken-rcsfile-rcs.dump',
)
conv = ensure_conversion(
'gh-issue-18-irregular-rcsfile',
args=['--default-eol=native'],
dumpfile='broken-rcsfile-int.dump',
)
if conv.output_found(r'WARNING\: internal problem\: leftover revisions'):
raise Failure()
rcs_lines = list(open(rcs_conv.dumpfile, 'rb'))
lines = list(open(conv.dumpfile, 'rb'))
# Compare all lines following the repository UUID:
if lines[3:] != rcs_lines[3:]:
raise Failure()


@Cvs2SvnTestFunction
def timestamp_chaos():
"test timestamp adjustments"
@@ -4237,10 +4271,11 @@ def vendor_1_1_not_root():
internal_co_exclude,
internal_co_trunk_only,
internal_co_keywords,
internal_co_broken_rcsfile,
leftover_revs,
requires_internal_co,
timestamp_chaos,
# 140:
timestamp_chaos,
symlinks,
empty_trunk_path,
preferred_parent_cycle,
@@ -4250,8 +4285,8 @@ def vendor_1_1_not_root():
add_on_branch,
main_git,
main_git2,
main_git_merged,
# 150:
main_git_merged,
main_git2_merged,
git_options,
main_hg,
@@ -4261,8 +4296,8 @@ def vendor_1_1_not_root():
EOLVariants('LF'),
EOLVariants('CR'),
EOLVariants('CRLF'),
EOLVariants('native'),
# 160:
EOLVariants('native'),
no_revs_file,
mirror_keyerror_test,
exclude_ntdb_test,
@@ -4272,8 +4307,8 @@ def vendor_1_1_not_root():
missing_deltatext,
transform_unlabeled_branch_name,
ignore_unlabeled_branch,
exclude_unlabeled_branch,
# 170:
exclude_unlabeled_branch,
unlabeled_branch_name_collision,
collision_with_unlabeled_branch_name,
many_deletes,
@@ -4283,8 +4318,8 @@ def vendor_1_1_not_root():
exclude_symbol_default,
add_on_branch2,
branch_from_vendor_branch,
strange_default_branch,
# 180:
strange_default_branch,
move_parent,
log_message_eols,
missing_vendor_branch,
122 changes: 122 additions & 0 deletions test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
head 1.6;
access;
symbols;
locks; strict;
comment @# @;


1.6
date 2021.12.21.11.21.57; author futatuki; state Exp;
branches;
next 1.5;

1.5
date 2021.11.26.04.16.45; author futatuki; state Exp;
branches;
next 1.4;

1.4
date 2021.11.26.04.14.23; author futatuki; state Exp;
branches;
next 1.3;

1.3
date 2021.11.26.03.46.06; author futatuki; state Exp;
branches;
next 1.2;

1.2
date 2021.11.26.03.41.39; author futatuki; state Exp;
branches
1.2.1.1;
next 1.1;

1.1
date 2021.11.26.03.34.28; author futatuki; state Exp;
branches;
next ;

1.2.1.1
date 2021.12.21.11.44.31; author futatuki; state Exp;
branches;
next ;


desc
@create a new file irregular.txt
@


1.6
log
@r1.6:
@
text
@aaa
bbb
bc
cc
hhh@


1.5
log
@r1.5:
@
text
@d3 1
a3 1
c@


1.4
log
@r1.4:
@
text
@d5 1
a5 1
ggg@


1.3
log
@r1.3: s/eee/fff/
@
text
@d4 1
a4 1
fff@


1.2
log
@r1.2: s/ddd/eee/
@
text
@d4 1
a4 1
eee@


1.2.1.1
log
@rev:1.2.1.1
@
text
@d4 1
a4 2
ccc
eeeggg
@


1.1
log
@r1.1
@
text
@d4 1
a4 1
ddd@