In working on the new data-driven animations, I needed to ensure that the C++ code was reading and writing the animation data correctly. Because I like the format, I am using YAML for the data files (though JSON is also supported by the C++ code). I felt it was easier to just read and then write the data, and compare the two to ensure the correct behavior, rather than trying to view all of the data in the debugger. For this, a regular diff is not ideal, as the order of the keys does not affect the meaning of the data. So I looked into a few online utilities to compare the YAML (e.g. here, here), but I wanted one that worked offline. For this, I found a few utilities:
So I found a tool I liked. Problem solved, right. So why the blog post?
Because dyff still left a few things to be desired:
So, I wrote a simple Python script using ruamel.yaml to produce a similar output to dyff with my additions. I felt that this was simpler than trying to learn and build the Go source for dyff. Below are the results of my script.
It was certainly a fun afternoon project. I got to try out printing colors to the terminal for the first time (at least in recent memory). This was an excellent introduction to that for Python, which I appreciated. I went with the termcolor library for my code, as it reset's the color at the end automatically, while the other options leave it with the color until you explicitly reset it. I also got to try writing a reasonable difference algorithm, which was a pleasant (though fairly straightforward) challenge.
Below is the code, MIT licensed, if you want to use it:
import os
import sys
from ruamel.yaml import YAML
from ruamel.yaml.compat import StringIO
from collections.abc import Mapping, Sequence, Collection
from numbers import Number
import itertools
import argparse
def isclose(a, b, relative=1e-4, absolute=1e-5):
return abs(a-b) <= max(relative * max(abs(a), abs(b)), absolute)
class MyYAML(YAML):
def dump(self, data, stream=None, **kw):
inefficient = False
if stream is None:
inefficient = True
stream = StringIO()
YAML.dump(self, data, stream, **kw)
if inefficient:
val = stream.getvalue()
if val.strip().endswith('\n...'):
val = val.strip()[:-4]
return val
def flow_vectors(data,rounding=6):
if isinstance(data,Mapping):
for k in data:
if rounding is not None:
if isinstance(data[k],float):
data[k] = round(data[k],rounding)
flow_vectors(data[k])
elif isinstance(data,Sequence) and not isinstance(data,str):
if rounding is not None:
for i in range(len(data)):
if isinstance(data[i],float):
data[i] = round(data[i],rounding)
if 0 < len(data) <= 4 and all([isinstance(v,Number) for v in data]):
try:
data.fa.set_flow_style()
except:
print(data)
else:
for v in data:
flow_vectors(v)
def reflow(data,rounding=6):
if isinstance(data,Mapping):
try:
data.fa.set_block_style()
except:
print(data)
for k in data:
if rounding is not None:
if isinstance(data[k],float):
data[k] = round(data[k],rounding)
reflow(data[k])
elif isinstance(data,Sequence) and not isinstance(data,str):
if rounding is not None:
for i in range(len(data)):
if isinstance(data[i],float):
data[i] = round(data[i],rounding)
if 0 < len(data) <= 4 and all([isinstance(v,Number) for v in data]):
try:
data.fa.set_flow_style()
except:
print(data)
else:
try:
data.fa.set_block_style()
except:
print(data)
for v in data:
reflow(v)
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('fromfile',help='From side of diff')
parser.add_argument('tofile',help='To side of diff')
parser.add_argument('-n','--numbers',action='store_true'
,help='Counts similar numbers as being the same')
parser.add_argument('-e','--empty',action='store_true'
,help='Skips empty maps and lists from added/removed entries.')
parser.add_argument('-p','--plain',action='store_true'
,help='Removes + and - prefixes from the diff.')
params = parser.parse_args()
y = []
for fn in [params.fromfile,params.tofile]:#[fromf,tof]:
yaml=MyYAML(typ='rt') # default, if not specfied, is 'rt' (round-trip), alternate 'safe'
yaml.default_flow_style = True
with open(fn) as f:
doc = f.read()
data = yaml.load(doc.replace('\t',' '))
y.append(data)
fromy,toy = y
def join(base,add):
if base:
return f'{base}.{add}'
return f'{add}'
def change(*args):
return (*args,)
class change(object):
def __init__(self,path,kind,message,fr,to):
self.path = path
self.kind = kind
self.message = message
self.fr = fr
self.to = to
def __repr__(self):
from termcolor import colored
from textwrap import indent
val = f'''{colored(self.path or '(root level)','white',attrs=['bold'])}
{colored(self.kind,'blue') + ' ' + colored(self.message,'blue',attrs=['underline'])}'''
if self.fr is not None:
val += '\n' + colored(indent(yaml.dump(self.fr).strip(),' ' if params.plain else '- '),'red')
if self.to is not None:
val += '\n' + colored(indent(yaml.dump(self.to).strip(),' ' if params.plain else '+ '),'green')
return val
def __str__(self):
return __repr__(self)
def diff(p,f,t):
ftype,ttype = type(f),type(t)
if params.numbers and isinstance(f,Number) and isinstance(t,Number):
if f == t:
return [change(p,'=',f'numbers same',f,t)]
elif f != t and isclose(f,t):
return [change(p,'=',f'numbers close',f,t)]
if ftype != ttype:
return [change(p,'∓',f'type change from {ftype} to {ttype}',f,t)]
if isinstance(f,Mapping):
fkeys = set(f.keys())
tkeys = set(t.keys())
removed = fkeys - tkeys
added = tkeys - fkeys
same = fkeys & tkeys
res = []
addmap = t.copy()
for k in [*addmap.keys()]:
if k not in added:
del addmap[k]
elif params.empty and isinstance(addmap[k],Collection) and len(addmap[k]) == 0:
del addmap[k]
if addmap:
res.append(change(p,'+','map entries added',None,addmap))
removemap = f.copy()
for k in [*removemap.keys()]:
if k not in removed:
del removemap[k]
elif params.empty and isinstance(removemap[k],Collection) and len(removemap[k]) == 0:
del removemap[k]
if removemap:
res.append(change(p,'-','map entries removed',removemap,None))
[res.extend(diff(join(p,k),f[k],t[k])) for k in f.keys() if k in same]
return res
elif isinstance(f,Sequence) and not isinstance(f,str):
res = []
if len(f) < len(t):
res.append(change(p,'+','list entries added',None,t[len(f):]))
elif len(f) > len(t):
res.append(change(p,'+','list entries added',f[len(t):],None))
for i,(fv,tv) in enumerate(zip(f,t)):
res.extend(diff(join(p,i),fv,tv))
return res
elif f != t:
return [change(p,'∓',f'value change',f,t)]
else:
return [change(p,'=','skipped',f,t)]
from pprint import pprint
d = diff('',fromy,toy)
fd = [v for v in d if v.kind != '=']
print('\n\n'.join(list(map(repr,fd))))
PS: As an extra comparison, here's the output from meld
, a gui diff/merge tool.