# ignore
from typing import Dict, Callable, TextIO, List, Tuple, Set, Any, Type, Optional

# ignore
PASS = "✔"
FAIL = "✘"
UNRESOLVED = "?"

PASS_COLOR = 'darkgreen'  # '#006400' # darkgreen
FAIL_COLOR = 'red4'  # '#8B0000' # darkred

STEP_COLOR = 'peachpuff'
FONT_NAME = 'Raleway'

# ignore
def graph(comment: str = "default") -> Digraph:
    return Digraph(name='', comment=comment,
                   graph_attr={
                       'rankdir': 'LR',
                   },
                   node_attr={
                       'style': 'filled',
                       'shape': 'box',
                       'fillcolor': STEP_COLOR,
                       'fontname': FONT_NAME,
                   },
                   edge_attr={
                       'fontname': FONT_NAME,
                   })

# ignore
VERSIONS = 8


def display_versions(outcomes: Dict[int, str]) -> Digraph:
    state_machine = graph()
    for version_number in range(1, VERSIONS + 1):
        id = f'v{version_number}'
        label = f' {outcomes [version_number]}' \
            if version_number in outcomes else ''
        state_machine.node(id, label=f'{id}{label}')
        if version_number > 1:
            last_id = f'v{version_number - 1}'
            state_machine.edge(last_id, id)

    display(state_machine)

# ignore
display_versions({1: PASS, 8: FAIL})

PROJECT = 'my_project'

try:
    shutil.rmtree(PROJECT)
except FileNotFoundError:
    pass
os.mkdir(PROJECT)

sys.path.append(os.getcwd())
os.chdir(PROJECT)

!git init

hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: of your new repositories, which will suppress this warning, call:
hint:
hint: 	git config --global init.defaultBranch <name>
hint:
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint:
hint: 	git branch -m <name>
Initialized empty Git repository in /Users/zeller/Projects/debuggingbook/notebooks/my_project/.git/

!git config user.name "Demo User"

!git config user.email "demo-user@example.com"

!git config advice.detachedHead False

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

def write_source(fun: Callable, filename: Optional[str] = None) -> None:
    if filename is None:
        filename = fun.__name__ + '.py'
    with open(filename, 'w') as fh:
        fh.write(inspect.getsource(fun))

write_source(remove_html_markup)

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

!git add remove_html_markup.py

!git commit -m "First version"

[master (root-commit) a6a2ca5] First version
 1 file changed, 13 insertions(+)
 create mode 100644 remove_html_markup.py

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

write_source(remove_html_markup)

!git diff remove_html_markup.py

diff --git a/remove_html_markup.py b/remove_html_markup.py
index 4999dc0..768bae9 100644
--- a/remove_html_markup.py
+++ b/remove_html_markup.py
@@ -1,12 +1,15 @@
 def remove_html_markup(s):  # type: ignore
     tag = False
+    quote = False
     out = ""
 
     for c in s:
-        if c == '<':    # start of markup
+        if c == '<' and not quote:
             tag = True
-        elif c == '>':  # end of markup
+        elif c == '>' and not quote:
             tag = False
+        elif c == '"' or c == "'" and tag:
+            quote = not quote
         elif not tag:
             out = out + c

!git commit -m "Second version" remove_html_markup.py

[master a9a4bcb] Second version
 1 file changed, 5 insertions(+), 2 deletions(-)

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    # postcondition
    assert '<' not in out and '>' not in out

    return out

write_source(remove_html_markup)

!git commit -m "Eighth version (with proper assertion)" remove_html_markup.py

[master cd4a103] Eighth version (with proper assertion)
 1 file changed, 4 insertions(+), 1 deletion(-)

remove_html_markup('"foo"')

'foo'

with ExpectError():
    assert remove_html_markup('"foo"') == '"foo"'

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/1036061906.py", line 2, in <module>
    assert remove_html_markup('"foo"') == '"foo"'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

git log

!git log

commit cd4a103e9700a40685a9fb69e72bcd72aeb4e377 (HEAD -> master)
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:58 2025 +0100

    Eighth version (with proper assertion)

commit 5e6c21f3aaf01d349f034d8423765195570b7d25
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:57 2025 +0100

    Seventh version (fixed)

commit a57602d623dc42202eee9f46e0da4f1a1179af62
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:57 2025 +0100

    Sixth version (with another assert)

commit 41d36c5be5385bbd53fdd2cfd0b3b63128a16d8a
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:57 2025 +0100

    Fifth version (with assert)

commit ee41cc005a1952a6154893fd4724ef8dfe4cf834
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:57 2025 +0100

    Fourth version (clueless)

commit c507f4a2329f26357451312ba2a937f4c812af5a
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:57 2025 +0100

    Third version (with debugging output)

commit a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:56 2025 +0100

    Second version

commit a6a2ca5af702ea2eb21f2e1bb0dcafbe85597792
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:56 2025 +0100

    First version

def get_output(command: List[str]) -> str:
    result = subprocess.run(command, 
                            stdout=subprocess.PIPE,
                            universal_newlines=True)
    return result.stdout

log = get_output(['git', 'log', '--pretty=oneline'])
print(log)

cd4a103e9700a40685a9fb69e72bcd72aeb4e377 Eighth version (with proper assertion)
5e6c21f3aaf01d349f034d8423765195570b7d25 Seventh version (fixed)
a57602d623dc42202eee9f46e0da4f1a1179af62 Sixth version (with another assert)
41d36c5be5385bbd53fdd2cfd0b3b63128a16d8a Fifth version (with assert)
ee41cc005a1952a6154893fd4724ef8dfe4cf834 Fourth version (clueless)
c507f4a2329f26357451312ba2a937f4c812af5a Third version (with debugging output)
a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160 Second version
a6a2ca5af702ea2eb21f2e1bb0dcafbe85597792 First version

versions = [line.split()[0] for line in log.split('\n') if line]
versions.reverse()
versions[0]

'a6a2ca5af702ea2eb21f2e1bb0dcafbe85597792'

!git checkout {versions[0]}

HEAD is now at a6a2ca5 First version

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

exec(open('remove_html_markup.py').read())

remove_html_markup('"foo"')

'"foo"'

!git checkout {versions[7]}

Previous HEAD position was a6a2ca5 First version
HEAD is now at cd4a103 Eighth version (with proper assertion)

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    # postcondition
    assert '<' not in out and '>' not in out

    return out

exec(open('remove_html_markup.py').read())

remove_html_markup('"foo"')

'foo'

!git bisect start

status: waiting for both good and bad commits

!git bisect good {versions[0]}

status: waiting for bad commit, 1 good commit known

!git bisect bad {versions[7]}

Bisecting: 3 revisions left to test after this (roughly 2 steps)
[ee41cc005a1952a6154893fd4724ef8dfe4cf834] Fourth version (clueless)

# ignore
display_versions({1: PASS, 4: UNRESOLVED, 8: FAIL})

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<':  # and not quote:
            tag = True
        elif c == '>':  # and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

git bisect good

exec(open('remove_html_markup.py').read())

remove_html_markup('"foo"')

'foo'

!git bisect bad

Bisecting: 0 revisions left to test after this (roughly 1 step)
[c507f4a2329f26357451312ba2a937f4c812af5a] Third version (with debugging output)

# ignore
display_versions({1: PASS, 3: UNRESOLVED, 4: FAIL, 8: FAIL})

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        print("c =", repr(c), "tag =", tag, "quote =", quote)

        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

exec(open('remove_html_markup.py').read())

remove_html_markup('"foo"')

c = '"' tag = False quote = False
c = 'f' tag = False quote = True
c = 'o' tag = False quote = True
c = 'o' tag = False quote = True
c = '"' tag = False quote = True

'foo'

!git bisect bad

Bisecting: 0 revisions left to test after this (roughly 0 steps)
[a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160] Second version

# ignore
display_versions({1: PASS, 2: UNRESOLVED, 3: FAIL, 4: FAIL, 8: FAIL})

print_file('remove_html_markup.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

exec(open('remove_html_markup.py').read())

remove_html_markup('"foo"')

'foo'

# ignore
display_versions({1: PASS, 2: FAIL, 3: FAIL, 4: FAIL, 8: FAIL})

!git bisect bad

a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160 is the first bad commit
commit a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160 (HEAD)
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:56 2025 +0100

    Second version

 remove_html_markup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

!git diff HEAD^

diff --git a/remove_html_markup.py b/remove_html_markup.py
index 4999dc0..768bae9 100644
--- a/remove_html_markup.py
+++ b/remove_html_markup.py
@@ -1,12 +1,15 @@
 def remove_html_markup(s):  # type: ignore
     tag = False
+    quote = False
     out = ""
 
     for c in s:
-        if c == '<':    # start of markup
+        if c == '<' and not quote:
             tag = True
-        elif c == '>':  # end of markup
+        elif c == '>' and not quote:
             tag = False
+        elif c == '"' or c == "'" and tag:
+            quote = not quote
         elif not tag:
             out = out + c

!git bisect reset

Previous HEAD position was a9a4bcb Second version
HEAD is now at cd4a103 Eighth version (with proper assertion)

# ignore
open('test.py', 'w').write('''
#!/usr/bin/env python

from remove_html_markup import remove_html_markup
import sys

result = remove_html_markup('"foo"')
if result == '"foo"':
    sys.exit(0)  # good/pass
elif result == 'foo':
    sys.exit(1)  # bad/fail
else:
    sys.exit(125)  # unresolved
''');

print_file('test.py')

#!/usr/bin/env python

from remove_html_markup import remove_html_markup
import sys

result = remove_html_markup('"foo"')
if result == '"foo"':
    sys.exit(0)  # good/pass
elif result == 'foo':
    sys.exit(1)  # bad/fail
else:
    sys.exit(125)  # unresolved

!python ./test.py; echo $?

1

!git bisect start

status: waiting for both good and bad commits

!git bisect good {versions[0]}

status: waiting for bad commit, 1 good commit known

!git bisect bad {versions[7]}

Bisecting: 3 revisions left to test after this (roughly 2 steps)
[ee41cc005a1952a6154893fd4724ef8dfe4cf834] Fourth version (clueless)

!git bisect run python test.py

running 'python' 'test.py'

Bisecting: 0 revisions left to test after this (roughly 1 step)
[c507f4a2329f26357451312ba2a937f4c812af5a] Third version (with debugging output)
running 'python' 'test.py'
c = '"' tag = False quote = False
c = 'f' tag = False quote = True
c = 'o' tag = False quote = True
c = 'o' tag = False quote = True
c = '"' tag = False quote = True
Bisecting: 0 revisions left to test after this (roughly 0 steps)
[a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160] Second version
running 'python' 'test.py'

a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160 is the first bad commit
commit a9a4bcb5fb61b4189d5de8a08fa5f01ed6874160
Author: Demo User <demo-user@example.com>
Date:   Mon Jan 20 10:56:56 2025 +0100

    Second version

 remove_html_markup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
bisect found first bad commit

!git diff HEAD^

diff --git a/remove_html_markup.py b/remove_html_markup.py
index 4999dc0..768bae9 100644
--- a/remove_html_markup.py
+++ b/remove_html_markup.py
@@ -1,12 +1,15 @@
 def remove_html_markup(s):  # type: ignore
     tag = False
+    quote = False
     out = ""
 
     for c in s:
-        if c == '<':    # start of markup
+        if c == '<' and not quote:
             tag = True
-        elif c == '>':  # end of markup
+        elif c == '>' and not quote:
             tag = False
+        elif c == '"' or c == "'" and tag:
+            quote = not quote
         elif not tag:
             out = out + c

!git bisect reset

Previous HEAD position was a9a4bcb Second version
HEAD is now at cd4a103 Eighth version (with proper assertion)

version_1 = get_output(['git', 'show',
                        f'{versions[0]}:remove_html_markup.py'])

print_content(version_1, '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

version_2 = get_output(['git', 'show',
                        f'{versions[1]}:remove_html_markup.py'])

print_content(version_2, '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

!git diff {versions[0]} {versions[1]}

diff --git a/remove_html_markup.py b/remove_html_markup.py
index 4999dc0..768bae9 100644
--- a/remove_html_markup.py
+++ b/remove_html_markup.py
@@ -1,12 +1,15 @@
 def remove_html_markup(s):  # type: ignore
     tag = False
+    quote = False
     out = ""
 
     for c in s:
-        if c == '<':    # start of markup
+        if c == '<' and not quote:
             tag = True
-        elif c == '>':  # end of markup
+        elif c == '>' and not quote:
             tag = False
+        elif c == '"' or c == "'" and tag:
+            quote = not quote
         elif not tag:
             out = out + c

def diff(s1: str, s2: str, mode: str = 'lines') -> List[patch_obj]:
    """Compare s1 and s2 like `diff`; return a list of patches"""

    # Sometimes, we may get bytes instead of strings
    # Let's convert these in a conservative way
    if not isinstance(s1, str):
        s1 = str(s1, 'latin1')
    if not isinstance(s2, str):
        s2 = str(s2, 'latin1')

    dmp = diff_match_patch()
    if mode == 'lines':
        (text1, text2, linearray) = dmp.diff_linesToChars(s1, s2)

        diffs = dmp.diff_main(text1, text2)
        dmp.diff_charsToLines(diffs, linearray)
        return dmp.patch_make(diffs)

    if mode == 'chars':
        diffs = dmp.diff_main(s1, s2)
        return dmp.patch_make(s1, diffs)

    raise ValueError("mode must be 'lines' or 'chars'")

patches = diff(version_1, version_2)
patches

[<diff_match_patch.diff_match_patch.patch_obj at 0x1212f5790>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1212f52b0>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1212f7020>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1212f67b0>]

def patch_string(p: patch_obj) -> str:
    return urllib.parse.unquote(str(p).strip())  # type: ignore

def print_patch(p: patch_obj) -> None:
    print_content(patch_string(p), '.py')
    print()

for p in patches:
    print_patch(p)

@@ -48,24 +48,42 @@
 tag = False

+    quote = False

     out = ""

@@ -104,50 +104,43 @@
  s:

-        if c == '<':    # start of markup

+        if c == '<' and not quote:
@@ -162,48 +162,45 @@
 rue

-        elif c == '>':  # end of markup

+        elif c == '>' and not quote:
@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

def patch(text: str, patches: List[patch_obj]) -> str:
    """Apply given patches on given text; return patched text."""
    dmp = diff_match_patch()
    patched_text, success = dmp.patch_apply(patches, text)
    assert all(success), "Could not apply some patch(es)"
    return patched_text

print_content(patch(version_1, patches), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

assert patch(version_1, patches) == version_2

assert patch(version_1, []) == version_1

print(patch_string(patches[0]))

@@ -48,24 +48,42 @@
 tag = False

+    quote = False

     out = ""

print_content(patch(version_1, [patches[0]]), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

print_content(patch(version_1, [patches[1]]), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

quiz("What has changed in version 1 after applying the second patch?",
     [
         "The initialization of quote is deleted",
         "The condition after `if c == '<'` is expanded",
         "The tag variable gets a different value",
         "None of the above"
     ], '1 / 1 + 1 ** 1 - 1 % 1 * 1')

def test_remove_html_markup_patches(patches: patch_obj) -> None:
    new_version = patch(version_1, patches)
    exec(new_version, globals())
    assert remove_html_markup('"foo"') == '"foo"'

test_remove_html_markup_patches([])

with ExpectError(AssertionError):
    test_remove_html_markup_patches(patches)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/382405065.py", line 2, in <module>
    test_remove_html_markup_patches(patches)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/1244207729.py", line 4, in test_remove_html_markup_patches
    assert remove_html_markup('"foo"') == '"foo"'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

with DeltaDebugger() as dd:
    test_remove_html_markup_patches(patches)

reduced_patches = dd.min_args()['patches']

for p in reduced_patches:
    print_patch(p)

@@ -48,24 +48,42 @@
 tag = False

+    quote = False

     out = ""

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

print_content(patch(version_1, reduced_patches), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

pass_patches, fail_patches, diffs = \
    tuple(arg['patches'] for arg in dd.min_arg_diff())

print_content(patch(version_1, pass_patches), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

print_content(patch(version_1, fail_patches), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

for p in diffs:
    print_patch(p)

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

class ChangeDebugger(CallCollector):
    def __init__(self, pass_source: str, fail_source: str, **ddargs: Any) -> None:
        """Constructor. Takes a passing source file (`pass_source`)
        and a failing source file (`fail_source`).
        Additional arguments are passed to `DeltaDebugger` constructor.
        """
        super().__init__()
        self._pass_source = pass_source
        self._fail_source = fail_source
        self._patches = diff(pass_source, fail_source)
        self._ddargs = ddargs
        self.log = ddargs['log'] if 'log' in ddargs else False

    def pass_source(self) -> str:
        """Return the passing source file."""
        return self._pass_source

    def fail_source(self) -> str:
        """Return the failing source file."""
        return self._fail_source

    def patches(self) -> List[patch_obj]:
        """Return the diff between passing and failing source files."""
        return self._patches

def test_remove_html_markup() -> None:
    assert remove_html_markup('"foo"') == '"foo"'

with ChangeDebugger(version_1, version_2) as cd:
    test_remove_html_markup()

with ExpectError(AssertionError):
    cd.call()

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/3359775007.py", line 2, in <module>
    cd.call()
  File "/Users/zeller/Projects/debuggingbook/notebooks/DeltaDebugger.ipynb", line 214, in call
    return self.function()(**args)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/3114633459.py", line 2, in test_remove_html_markup
    assert remove_html_markup('"foo"') == '"foo"'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

print_content(cd.pass_source(), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

print_content(cd.fail_source(), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

cd.patches()

[<diff_match_patch.diff_match_patch.patch_obj at 0x121404ec0>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1214049e0>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x121405c70>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x121404e30>]

class ChangeDebugger(ChangeDebugger):
    def test_patches(self, patches: List[patch_obj]) -> None:
        new_version = patch(self.pass_source(), patches)
        exec(new_version, globals())
        self.call()

class ChangeDebugger(ChangeDebugger):
    def __enter__(self) -> Any:
        """Called at begin of a `with` block. Checks if current source fails."""
        exec(self.fail_source(), globals())
        return super().__enter__()

with ChangeDebugger(version_1, version_2) as cd:
    test_remove_html_markup()

cd.test_patches([])

with ExpectError(AssertionError):
    cd.test_patches(cd.patches())

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/3279782181.py", line 2, in <module>
    cd.test_patches(cd.patches())
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/1738176692.py", line 5, in test_patches
    self.call()
  File "/Users/zeller/Projects/debuggingbook/notebooks/DeltaDebugger.ipynb", line 214, in call
    return self.function()(**args)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/3114633459.py", line 2, in test_remove_html_markup
    assert remove_html_markup('"foo"') == '"foo"'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

class ChangeDebugger(ChangeDebugger):
    def min_patches(self) -> Tuple[List[patch_obj], List[patch_obj], List[patch_obj]]:
        """
        Compute a minimal set of patches.
        Returns a triple (`pass_patches`, `fail_patches`, `diff_patches`) 
        where `diff_patches` is the minimal difference between 
        the set `pass_patches` (which, when applied, make the test pass) and 
        the set `fail_patches` (which, when applied, make the test fail).
        """
        patches = self.patches()
        with DeltaDebugger(**self._ddargs) as dd:
            self.test_patches(patches)

        args = dd.min_arg_diff()
        pass_patches = args[0]['patches']
        fail_patches = args[1]['patches']
        diff_patches = args[2]['patches']

        return (pass_patches, fail_patches, diff_patches)

class ChangeDebugger(ChangeDebugger):
    def __repr__(self) -> str:
        """Return readable list of minimal patches"""
        pass_patches, fail_patches, diff_patches = self.min_patches()
        return "".join(patch_string(p) for p in diff_patches)

with ChangeDebugger(version_1, version_2) as cd:
    test_remove_html_markup()

cd.patches()

[<diff_match_patch.diff_match_patch.patch_obj at 0x1214ef110>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1214ef9b0>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1214ee930>,
 <diff_match_patch.diff_match_patch.patch_obj at 0x1214ef050>]

pass_patches, fail_patches, diffs = cd.min_patches()
diffs

[<diff_match_patch.diff_match_patch.patch_obj at 0x1214ef050>]

print(patch_string(diffs[0]))

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

cd

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

version_8 = get_output(['git', 'show', 
                            f'{versions[7]}:remove_html_markup.py'])

with ChangeDebugger(version_1, version_8) as cd:
    test_remove_html_markup()

len(cd.patches())

5

cd

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

class NotPassingError(ValueError):
    pass

class ChangeDebugger(ChangeDebugger):
    def after_collection(self) -> None:
        """Diagnostics."""
        if self.function() is None:
            raise NoCallError("No function call observed")
        if self.exception() is None:
            raise NotFailingError(f"{self.format_call()} did not raise an exception")

        try:
            self.test_patches([])
        except Exception:
            raise NotPassingError(f"{self.format_call()} raised an exception in its passing version")

        try:
            self.test_patches(self.patches())
            raise NotFailingError(f"{self.format_call()} did not raise an exception in failing version")
        except Exception:
            pass

        if self.log:
            print(f"Observed {self.format_call()}" +
                  f" raising {self.format_exception(self.exception())}")

with ExpectError(NotPassingError):
    with ChangeDebugger(version_1, version_2) as cd:
        test_remove_html_markup()

with ChangeDebugger(source_pass, source_fail) as cd:
    test()
cd

# ignore
source_pass = version_1
source_fail = version_2

print_content(source_pass, '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif not tag:
            out = out + c

    return out

def test() -> None:
    assert remove_html_markup('"foo"') == '"foo"'

exec(source_pass)
test()

print_content(source_fail, '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

exec(source_fail)
with ExpectError(AssertionError):
    test()

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/4262003862.py", line 3, in <module>
    test()
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_12520/3045937450.py", line 2, in test
    assert remove_html_markup('"foo"') == '"foo"'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

with ChangeDebugger(source_pass, source_fail) as cd:
    test()
cd

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

pass_patches, fail_patches, diffs = cd.min_patches()

for p in pass_patches:
    print_patch(p)

@@ -48,24 +48,42 @@
 tag = False

+    quote = False

     out = ""
@@ -104,50 +104,43 @@
  s:

-        if c == '<':    # start of markup

+        if c == '<' and not quote:

for p in diffs:
    print_patch(p)

@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

# ignore
from ClassDiagram import display_class_hierarchy

# ignore
display_class_hierarchy([ChangeDebugger],
                        public_methods=[
                            CallCollector.__init__,
                            CallCollector.__enter__,
                            CallCollector.__exit__,
                            CallCollector.call,  # type: ignore
                            CallCollector.args,
                            CallCollector.function,
                            CallCollector.exception,
                            ChangeDebugger.__init__,
                            ChangeDebugger.min_patches,
                            ChangeDebugger.patches,
                            ChangeDebugger.pass_source,
                            ChangeDebugger.fail_source,
                            ChangeDebugger.__repr__,
                            ChangeDebugger.__enter__
                        ],
                        project='debuggingbook')

print_content(patch(source_pass, diffs), '.py')

def remove_html_markup(s):  # type: ignore
    tag = False
    out = ""

    for c in s:
        if c == '<':    # start of markup
            tag = True
        elif c == '>':  # end of markup
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

for p in diff(source_pass, source_fail):
    print_patch(p)

@@ -48,24 +48,42 @@
 tag = False

+    quote = False

     out = ""
@@ -104,50 +104,43 @@
  s:

-        if c == '<':    # start of markup

+        if c == '<' and not quote:
@@ -162,48 +162,45 @@
 rue

-        elif c == '>':  # end of markup

+        elif c == '>' and not quote:
@@ -215,24 +215,97 @@
 tag = False

+        elif c == '"' or c == "'" and tag:
            quote = not quote

         elif

try:
    shutil.rmtree(PROJECT)
except FileNotFoundError:
    pass

Isolating Failure-Inducing Changes¶

Changes and Bugs¶

Leveraging Version Histories¶

An Example Version History¶

Create a Working Directory¶

Initialize Git¶

Accessing Versions¶

Manual Bisecting¶

Automatic Bisecting¶

Computing and Applying Patches¶

Quiz

Delta Debugging on Patches¶

A Minimal Set of Patches¶

A Minimal Difference¶

A ChangeDebugger class¶

Synopsis¶

High-Level Interface¶

Programmatic Interface¶

Supporting Functions¶

Lessons Learned¶

Next Steps¶

Background¶

Exercises¶

Exercise 1: Fine-Grained Changes¶

Exercise 2: Failure-Inducing Changes in the Large¶

Exercise 3: Hierarchical Change Debugging¶