From 5b057055d56e9b082cba30e037ce17d5eaf5878a Mon Sep 17 00:00:00 2001 From: ppirooznia Date: Sat, 13 Jul 2024 16:07:12 -0700 Subject: [PATCH] Added docstrings --- jsondiff/__init__.py | 411 ++++++++++++++++++++++++++++++++++++++++++- jsondiff/symbols.py | 23 +++ 2 files changed, 432 insertions(+), 2 deletions(-) diff --git a/jsondiff/__init__.py b/jsondiff/__init__.py index eb2fadd..f3f089d 100644 --- a/jsondiff/__init__.py +++ b/jsondiff/__init__.py @@ -123,26 +123,113 @@ def serialize_data(self, obj, stream): class JsonDiffSyntax: def emit_set_diff(self, a, b, s, added, removed): + """ + Emits the difference between two sets. + + :param a: The original set. + :param b: The modified set. + :param s: The path to the current location in the JSON structure. + :param added: Elements that were added to 'b'. + :param removed: Elements that were removed from 'a'. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() def emit_list_diff(self, a, b, s, inserted, changed, deleted): + """ + Emits the difference between two lists. + + :param a: The original list. + :param b: The modified list. + :param s: The path to the current location in the JSON structure. + :param inserted: Index and value of elements inserted into 'b'. + :param changed: Index, original value, and new value of elements that have changed. + :param deleted: Index and value of elements that were deleted from 'a'. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() def emit_dict_diff(self, a, b, s, added, changed, removed): + """ + Emits the difference between two dictionaries. + + :param a: The original dictionary. + :param b: The modified dictionary. + :param s: The path to the current location in the JSON structure. + :param added: Key-value pairs that were added to 'b'. + :param changed: Keys and their corresponding old and new values for items that have changed. + :param removed: Keys of items that were removed from 'a'. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() def emit_value_diff(self, a, b, s): + """ + Emits the difference between two values. + + :param a: The original value. + :param b: The modified value. + :param s: The path to the current location in the JSON structure. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() def patch(self, a, d): + """ + Applies a patch to a JSON structure. + + :param a: The original JSON structure. + :param d: The patch to apply. + :return: The patched JSON structure. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() def unpatch(self, a, d): + """ + Reverses a patch on a JSON structure. + + :param a: The patched JSON structure. + :param d: The patch that was applied. + :return: The original JSON structure before the patch was applied. + :raises NotImplementedError: This is an abstract method. + """ raise NotImplementedError() class CompactJsonDiffSyntax: + """ + Provides a compact syntax for JSON differences, focusing on minimizing the output size. + This class is designed to emit and apply differences between two JSON structures in a compact form, + making it suitable for scenarios where bandwidth or storage efficiency is critical. + + Example: + Given two JSON structures, `a` and `b`: + + a = {"name": "Alice", "age": 30, "skills": ["Python", "Django"]} + b = {"name": "Alice", "age": 31, "skills": ["Python", "Django", "Flask"]} + + The `emit_dict_diff` method would produce a compact diff like: + + { + "age": 31, + "skills": {"insert": [(2, "Flask")]} + } + + This diff can then be applied to `a` using the `patch` method to obtain `b`. + """ + def emit_set_diff(self, a, b, s, added, removed): + """ + Emits a compact representation of the difference between two sets. + + :param a: The original set. + :param b: The modified set. + :param s: Similarity score between the two sets. + :param added: Elements added to the original set. + :param removed: Elements removed from the original set. + :return: A dictionary representing the changes in a compact form. + """ if s == 0.0 or len(removed) == len(a): return {replace: b} if isinstance(b, dict) else b else: @@ -154,6 +241,17 @@ def emit_set_diff(self, a, b, s, added, removed): return d def emit_list_diff(self, a, b, s, inserted, changed, deleted): + """ + Emits a compact representation of the difference between two lists. + + :param a: The original list. + :param b: The modified list. + :param s: Similarity score between the two lists. + :param inserted: Elements inserted into the original list. + :param changed: Elements changed in the original list. + :param deleted: Elements deleted from the original list. + :return: A dictionary representing the changes in a compact form. + """ if s == 0.0: return {replace: b} if isinstance(b, dict) else b elif s == 1.0 and not (inserted or changed or deleted): @@ -167,6 +265,17 @@ def emit_list_diff(self, a, b, s, inserted, changed, deleted): return d def emit_dict_diff(self, a, b, s, added, changed, removed): + """ + Emits a compact representation of the difference between two dictionaries. + + :param a: The original dictionary. + :param b: The modified dictionary. + :param s: Similarity score between the two dictionaries. + :param added: Key-value pairs added to the original dictionary. + :param changed: Key-value pairs changed in the original dictionary. + :param removed: Keys removed from the original dictionary. + :return: A dictionary representing the changes in a compact form. + """ if s == 0.0: return {replace: b} if isinstance(b, dict) else b elif s == 1.0 and not (added or changed or removed): @@ -178,12 +287,27 @@ def emit_dict_diff(self, a, b, s, added, changed, removed): return changed def emit_value_diff(self, a, b, s): + """ + Emits a compact representation of the difference between two values. + + :param a: The original value. + :param b: The modified value. + :param s: Similarity score between the two values. + :return: A dictionary or value representing the change in a compact form. + """ if s == 1.0: return {} else: return {replace: b} if isinstance(b, dict) else b def patch(self, a, d): + """ + Applies a compact diff to a JSON structure to produce the modified structure. + + :param a: The original JSON structure. + :param d: The compact diff to apply. + :return: The modified JSON structure after applying the diff. + """ if isinstance(d, dict): if not d: return a @@ -231,7 +355,39 @@ def patch(self, a, d): class ExplicitJsonDiffSyntax: + """ + Provides an explicit syntax for JSON differences, focusing on clarity and readability. + This class is designed to emit and apply differences between two JSON structures in a form that is easy to understand, + making it suitable for scenarios where human readability of diffs is important. + + Example: + Given two JSON structures, `a` and `b`: + + a = {"name": "Alice", "age": 30, "skills": ["Python", "Django"]} + b = {"name": "Alice", "age": 31, "skills": ["Python", "Django", "Flask"]} + + The `emit_dict_diff` method would produce an explicit diff like: + + { + "age": 31, + "skills": {"insert": [(2, "Flask")]} + } + + Unlike the compact syntax, this explicit form prioritizes readability and ease of understanding over minimizing size. + This diff can then be applied to `a` using the `patch` method to obtain `b`. + """ + def emit_set_diff(self, a, b, s, added, removed): + """ + Emits an explicit representation of the difference between two sets. + + :param a: The original set. + :param b: The modified set. + :param s: Similarity score between the two sets. + :param added: Elements added to the original set. + :param removed: Elements removed from the original set. + :return: A dictionary representing the changes in an explicit form. + """ if s == 0.0 or len(removed) == len(a): return b else: @@ -243,6 +399,17 @@ def emit_set_diff(self, a, b, s, added, removed): return d def emit_list_diff(self, a, b, s, inserted, changed, deleted): + """ + Emits an explicit representation of the difference between two lists. + + :param a: The original list. + :param b: The modified list. + :param s: Similarity score between the two lists. + :param inserted: Elements inserted into the original list. + :param changed: Elements changed in the original list. + :param deleted: Elements deleted from the original list. + :return: A dictionary representing the changes in an explicit form. + """ if s == 0.0 and not (inserted or changed or deleted): return b elif s == 1.0 and not (inserted or changed or deleted): @@ -256,6 +423,17 @@ def emit_list_diff(self, a, b, s, inserted, changed, deleted): return d def emit_dict_diff(self, a, b, s, added, changed, removed): + """ + Emits an explicit representation of the difference between two dictionaries. + + :param a: The original dictionary. + :param b: The modified dictionary. + :param s: Similarity score between the two dictionaries. + :param added: Key-value pairs added to the original dictionary. + :param changed: Key-value pairs changed in the original dictionary. + :param removed: Keys removed from the original dictionary. + :return: A dictionary representing the changes in an explicit form. + """ if s == 0.0 and not (added or changed or removed): return b elif s == 1.0 and not (added or changed or removed): @@ -271,6 +449,14 @@ def emit_dict_diff(self, a, b, s, added, changed, removed): return d def emit_value_diff(self, a, b, s): + """ + Emits an explicit representation of the difference between two values. + + :param a: The original value. + :param b: The modified value. + :param s: Similarity score between the two values. + :return: A dictionary or value representing the change in an explicit form. + """ if s == 1.0: return {} else: @@ -278,7 +464,43 @@ def emit_value_diff(self, a, b, s): class SymmetricJsonDiffSyntax: + """ + Provides a symmetric syntax for JSON differences, focusing on maintaining both original and modified values. + This class is designed to emit differences between two JSON structures in a way that both the original and modified + values are kept, making it suitable for scenarios where tracking both versions of the data is important. + + Example: + Given two JSON structures, `a` and `b`: + + a = {"name": "Alice", "age": 30, "skills": ["Python", "Django"]} + b = {"name": "Alice", "age": 31, "skills": ["Python", "Django", "Flask"]} + + The `emit_dict_diff` method would produce a symmetric diff like: + + { + "age": [30, 31], + "skills": {"insert": [(2, "Flask")]} + } + + This diff maintains both the original and modified values for the age field, and clearly shows the insertion + in the skills list. This format is particularly useful for applications that need to display or process both + versions of the data. + + The `patch` and `unpatch` methods can apply and reverse these diffs, respectively, allowing for flexible + data manipulation. + """ + def emit_set_diff(self, a, b, s, added, removed): + """ + Emits a symmetric representation of the difference between two sets. + + :param a: The original set. + :param b: The modified set. + :param s: Similarity score between the two sets. + :param added: Elements added to the original set. + :param removed: Elements removed from the original set. + :return: A dictionary representing the changes in a symmetric form. + """ if s == 0.0 or len(removed) == len(a): return [a, b] else: @@ -290,6 +512,17 @@ def emit_set_diff(self, a, b, s, added, removed): return d def emit_list_diff(self, a, b, s, inserted, changed, deleted): + """ + Emits a symmetric representation of the difference between two lists. + + :param a: The original list. + :param b: The modified list. + :param s: Similarity score between the two lists. + :param inserted: Elements inserted into the original list. + :param changed: Elements changed in the original list. + :param deleted: Elements deleted from the original list. + :return: A dictionary representing the changes in a symmetric form. + """ if s == 0.0 and not (inserted or changed or deleted): return [a, b] elif s == 1.0 and not (inserted or changed or deleted): @@ -303,6 +536,17 @@ def emit_list_diff(self, a, b, s, inserted, changed, deleted): return d def emit_dict_diff(self, a, b, s, added, changed, removed): + """ + Emits a symmetric representation of the difference between two dictionaries. + + :param a: The original dictionary. + :param b: The modified dictionary. + :param s: Similarity score between the two dictionaries. + :param added: Key-value pairs added to the original dictionary. + :param changed: Key-value pairs changed in the original dictionary. + :param removed: Keys removed from the original dictionary. + :return: A dictionary representing the changes in a symmetric form. + """ if s == 0.0 and not (added or changed or removed): return [a, b] elif s == 1.0 and not (added or changed or removed): @@ -316,12 +560,27 @@ def emit_dict_diff(self, a, b, s, added, changed, removed): return d def emit_value_diff(self, a, b, s): + """ + Emits a symmetric representation of the difference between two values. + + :param a: The original value. + :param b: The modified value. + :param s: Similarity score between the two values. + :return: A list containing the original and modified values. + """ if s == 1.0: return {} else: return [a, b] def patch(self, a, d): + """ + Applies a symmetric diff to a JSON structure to produce the modified structure. + + :param a: The original JSON structure. + :param d: The symmetric diff to apply. + :return: The modified JSON structure after applying the diff. + """ if isinstance(d, list): _, b = d return b @@ -368,6 +627,13 @@ def patch(self, a, d): raise Exception("Invalid symmetric diff") def unpatch(self, b, d): + """ + Reverses a symmetric diff on a JSON structure to produce the original structure. + + :param b: The modified JSON structure. + :param d: The symmetric diff that was applied. + :return: The original JSON structure before the diff was applied. + """ if isinstance(d, list): a, _ = d return a @@ -416,12 +682,44 @@ def unpatch(self, b, d): class RightOnlyJsonDiffSyntax(CompactJsonDiffSyntax): """ + Extends CompactJsonDiffSyntax to focus exclusively on the right (modified) values for lists, + suitable for scenarios where only the latest state matters, ignoring the specific changes that led there. Compare to the CompactJsonDiffSyntax, I will not compare the difference in list, because in some senario we only care about the right value (in most cases means latest value). Instead, I will pop the later list value. + + Example: + Given two JSON structures, `a` and `b`: + + a = {"name": "Alice", "age": 30, "skills": ["Python", "Django"]} + b = {"name": "Alice", "age": 31, "skills": ["Python", "Django", "Flask"]} + + The `emit_dict_diff` method would produce a diff focusing on the updated and added fields: + + { + "age": 31, + "skills": ["Python", "Django", "Flask"] + } + + And the `emit_list_diff` method directly returns the modified list without detailing the individual changes: + + ["Python", "Django", "Flask"] + + This approach simplifies the diff when the path from `a` to `b` is not as relevant as the final state represented by `b`. """ def emit_dict_diff(self, a, b, s, added, changed, removed): + """ + Emits a diff for dictionaries focusing on the final state, combining added and changed fields, and listing removed keys. + + :param a: The original dictionary. + :param b: The modified dictionary. + :param s: Similarity score between the two dictionaries. + :param added: Key-value pairs added to the original dictionary. + :param changed: Key-value pairs changed in the original dictionary. + :param removed: Keys removed from the original dictionary. + :return: A dictionary representing the final state or changes in a compact form. + """ if s == 1.0: return {} else: @@ -431,6 +729,17 @@ def emit_dict_diff(self, a, b, s, added, changed, removed): return changed def emit_list_diff(self, a, b, s, inserted, changed, deleted): + """ + Directly returns the modified list, disregarding the specifics of how it was altered from the original list. + + :param a: The original list. + :param b: The modified list. + :param s: Similarity score between the two lists. + :param inserted: Elements inserted into the original list. + :param changed: Elements changed in the original list. + :param deleted: Elements deleted from the original list. + :return: The modified list as the final state. + """ if s == 0.0: return b elif s == 1.0: @@ -448,12 +757,43 @@ def emit_list_diff(self, a, b, s, inserted, changed, deleted): class JsonDiffer: - + """ + A class for computing differences between two JSON structures and applying patches based on these differences. + + Attributes: + options (Options): Configuration options for the differ. + _symbol_map (dict): A mapping of escaped symbols to their Symbol instances. + + Methods: + diff(a, b, fp=None): Computes the difference between two JSON structures. + similarity(a, b): Calculates the similarity score between two JSON structures. + patch(a, d, fp=None): Applies a diff to a JSON structure to produce the modified structure. + unpatch(b, d, fp=None): Reverses a diff on a JSON structure to produce the original structure. + _unescape(x): Unescapes a string that has been escaped. + unmarshal(d): Converts a marshaled (potentially escaped) structure back to its original form. + _escape(o): Escapes a string or symbol that needs escaping. + marshal(d): Converts a structure to a marshaled (potentially escaped) form. + """ class Options: + """ + A placeholder class for options used by JsonDiffer. Options include syntax, load, dump, marshal, + loader, dumper, and escape_str. + """ pass def __init__(self, syntax='compact', load=False, dump=False, marshal=False, loader=default_loader, dumper=default_dumper, escape_str='$'): + """ + Initializes the JsonDiffer with specified options. + + :param syntax: The syntax to use for diffs. Defaults to 'compact'. + :param load: Whether to automatically load JSON from strings or files. + :param dump: Whether to automatically dump output to JSON strings or files. + :param marshal: Whether to marshal diffs to handle special characters. + :param loader: Custom function for loading JSON data. + :param dumper: Custom function for dumping JSON data. + :param escape_str: String used to escape special characters in keys. + """ self.options = JsonDiffer.Options() self.options.syntax = builtin_syntaxes.get(syntax, syntax) self.options.load = load @@ -468,6 +808,9 @@ def __init__(self, syntax='compact', load=False, dump=False, marshal=False, } def _list_diff_0(self, C, X, Y): + """ + Helper method for computing list differences using dynamic programming. + """ i, j = len(X), len(Y) r = [] while True: @@ -488,6 +831,9 @@ def _list_diff_0(self, C, X, Y): return reversed(r) def _list_diff(self, X, Y): + """ + Computes the difference between two lists. + """ # LCS m = len(X) n = len(Y) @@ -523,6 +869,9 @@ def _list_diff(self, X, Y): return self.options.syntax.emit_list_diff(X, Y, s, inserted, changed, deleted), s def _set_diff(self, a, b): + """ + Computes the difference between two sets. + """ removed = a.difference(b) added = b.difference(a) if not removed and not added: @@ -553,6 +902,9 @@ def _set_diff(self, a, b): return self.options.syntax.emit_set_diff(a, b, s, added, removed), s def _dict_diff(self, a, b): + """ + Computes the difference between two dictionaries. + """ removed = {} nremoved = 0 nadded = 0 @@ -580,6 +932,9 @@ def _dict_diff(self, a, b): return self.options.syntax.emit_dict_diff(a, b, s, added, changed, removed), s def _obj_diff(self, a, b): + """ + Computes the difference between any two JSON-compatible objects. + """ if a is b: return self.options.syntax.emit_value_diff(a, b, 1.0), 1.0 if isinstance(a, dict) and isinstance(b, dict): @@ -596,6 +951,9 @@ def _obj_diff(self, a, b): return self.options.syntax.emit_value_diff(a, b, 1.0), 1.0 def diff(self, a, b, fp=None): + """ + Computes the difference between two JSON structures. + """ if self.options.load: a = self.options.loader(a) b = self.options.loader(b) @@ -611,6 +969,9 @@ def diff(self, a, b, fp=None): return d def similarity(self, a, b): + """ + Calculates the similarity score between two JSON structures. + """ if self.options.load: a = self.options.loader(a) b = self.options.loader(b) @@ -620,6 +981,9 @@ def similarity(self, a, b): return s def patch(self, a, d, fp=None): + """ + Applies a diff to a JSON structure to produce the modified structure. + """ if self.options.load: a = self.options.loader(a) d = self.options.loader(d) @@ -635,6 +999,9 @@ def patch(self, a, d, fp=None): return b def unpatch(self, b, d, fp=None): + """ + Reverses a diff on a JSON structure to produce the original structure. + """ if self.options.load: b = self.options.loader(b) d = self.options.loader(d) @@ -649,8 +1016,10 @@ def unpatch(self, b, d, fp=None): else: return a - def _unescape(self, x): + """ + Unescapes a string that has been escaped. + """ if isinstance(x, string_types): sym = self._symbol_map.get(x, None) if sym is not None: @@ -660,6 +1029,9 @@ def _unescape(self, x): return x def unmarshal(self, d): + """ + Converts a marshaled (potentially escaped) structure back to its original form. + """ if isinstance(d, dict): return { self._unescape(k): self.unmarshal(v) @@ -674,6 +1046,9 @@ def unmarshal(self, d): return self._unescape(d) def _escape(self, o): + """ + Escapes a string or symbol that needs escaping. + """ if type(o) is Symbol: return self.options.escape_str + o.label if isinstance(o, string_types) and o.startswith(self.options.escape_str): @@ -681,6 +1056,9 @@ def _escape(self, o): return o def marshal(self, d): + """ + Converts a structure to a marshaled (potentially escaped) form. + """ if isinstance(d, dict): return { self._escape(k): self.marshal(v) @@ -696,14 +1074,43 @@ def marshal(self, d): def diff(a, b, fp=None, cls=JsonDiffer, **kwargs): + """ + Computes the difference between two JSON structures using a specified JsonDiffer class. + + :param a: The original JSON structure. + :param b: The modified JSON structure. + :param fp: Optional file pointer to dump the diff to. + :param cls: The JsonDiffer class or subclass to use for computing the diff. + :param kwargs: Additional keyword arguments to pass to the JsonDiffer constructor. + :return: The computed diff. + """ return cls(**kwargs).diff(a, b, fp) def patch(a, d, fp=None, cls=JsonDiffer, **kwargs): + """ + Applies a diff to a JSON structure to produce the modified structure using a specified JsonDiffer class. + + :param a: The original JSON structure. + :param d: The diff to apply. + :param fp: Optional file pointer to dump the patched structure to. + :param cls: The JsonDiffer class or subclass to use for applying the diff. + :param kwargs: Additional keyword arguments to pass to the JsonDiffer constructor. + :return: The patched JSON structure. + """ return cls(**kwargs).patch(a, d, fp) def similarity(a, b, cls=JsonDiffer, **kwargs): + """ + Calculates the similarity score between two JSON structures using a specified JsonDiffer class. + + :param a: The first JSON structure. + :param b: The second JSON structure. + :param cls: The JsonDiffer class or subclass to use for calculating similarity. + :param kwargs: Additional keyword arguments to pass to the JsonDiffer constructor. + :return: A similarity score as a float between 0.0 and 1.0. + """ return cls(**kwargs).similarity(a, b) diff --git a/jsondiff/symbols.py b/jsondiff/symbols.py index 7d022b1..48396b5 100644 --- a/jsondiff/symbols.py +++ b/jsondiff/symbols.py @@ -1,4 +1,27 @@ class Symbol: + """ + Symbol Usage Explanation: + + $add: Indicates keys or indices where new elements have been added. + $discard: Indicates elements that have been removed from a set. + $delete: Indicates keys or indices where elements have been deleted. + $insert: Used in lists to specify new elements inserted at specific indices. + $update: Used to indicate that the value of an existing key has changed. + $replace: Used to completely replace the value at a given location. + + These symbols are used within the diff structures returned by methods of JsonDiffer classes to represent different + types of changes between two JSON structures. For example: + + - In a dictionary, $add might be used to show new keys added, $delete to show keys that were removed, and $update + for keys whose values have changed. + - In a list, $insert could indicate new items added at specific positions, and $delete could show items removed + from specific positions. + - The $replace symbol is generally used when an entire section of the JSON (be it a list, dict, or value) + is replaced with another. + + These symbols help in succinctly representing changes in a structured way, making it easier to apply or revert + changes programmatically. + """ def __init__(self, label): self._label = label