liberty_parser/quicklogic_timings_importer/lib_diff.py - skywater-pdk-audit - Git at Google

 import difflib
 import argparse
 from pathlib import Path
 import re
 import log_printer


 def junk_characters(a):
     if a in ' \t\"':
         return True
     else:
         return False


 def clean_lines(
         lines,
         remove_comments=True,
         move_entry_to_newline=True,
         remove_quotes=True,
         remove_whitespaces=True,
         unify_numbers=True,
         remove_line_breaks=True):
     '''Performs file preprocessing before running diff.

     It is used for removing all semantically irrelevant characters that may
     blur the real differences between two files. By default it converts all
     tabs to single whitespace.

     Parameters
     ----------
     lines: list
         list of Liberty lines to clean
     remove_comments: bool
         removes comments if True
     move_entry_to_newline: bool
         if True and when there is content present after the closing brace, it
         is moved to newline
     remove_quotes: bool
         remove all quotes characters from file if True
     remove_whitespaces: bool
         remove all whitespaces if True
     unify_numbers: bool
         unifies the number notation if True
     remove_line_breaks:
         when the line is broken into multiple lines with backslash, convert it
         to single line if True

     Returns
     -------
     list: cleaned lines
     '''
     # join all lines into single string
     fullfile = '\n'.join(lines)

     if remove_comments:
         # remove comments (C/C++ style)
         fullfile = re.sub(r'(?:\/\*(.*?)\*\/)|(?:\/\/(.*?))', '',
                           fullfile, flags=re.DOTALL)

         # remove comments (Python style)
         fullfile = re.sub(r'#[^\n]*\n', '', fullfile, flags=re.DOTALL)

     # replace all tabs with single space
     fullfile = fullfile.replace('\t', ' ')

     if move_entry_to_newline:
         # move non-whitespace content after } to new line
         fullfile = re.sub(r'}\s*(?!\n)', '}\n', fullfile, flags=re.DOTALL)

     if remove_quotes:
         # remove quotes
         fullfile = fullfile.replace('"', '')

     if remove_whitespaces:
         # remove whitespaces
         fullfile = fullfile.replace(' ', '')

     if remove_line_breaks:
         fullfile = re.sub(r'\\\s*\n', '', fullfile, flags=re.DOTALL)

     # split single string into lines
     lines = fullfile.split('\n')
     fullfile = ''

     if remove_whitespaces:
         # remove empty lines and trailing whitespaces
         lines = [line.rstrip() for line in lines if line.strip()]

     if unify_numbers:
         floats = re.compile(
                 r'(?P<number>[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)')
         for i in range(len(lines)):
             lines[i] = floats.sub(
                     lambda m: str(float(m.group('number'))),
                     lines[i])

     # add newlines at the end of each line
     lines = [line + '\n' for line in lines]

     return lines


 def diff_files(
         in1,
         in2,
         print_diff=True,
         html_path=None,
         html_row_width=100,
         return_similarity=False,
         similarity_method='quick'):
     '''Compares two cleaned Liberty files and returns desired representations.

     Parameters
     ----------
     in1: list
         List of cleaned lines for first Liberty file
     in2: list
         List of cleaned lines for second Liberty file
     print_diff: bool
         If True, function will print Differ-like diff to standard output
     html_path: str
         If None, does nothing. Otherwise, it generates HTML file containing
         side-by-side comparison of cleaned Liberty files in the path
         specified in the parameter
     html_row_width: int
         Number of characters within single row of one side of the HTML
         comparison
     return_similarity: bool
         If True, function will return the value from 0 to 1 representing the
         similarity between files, where 0 means they are completely different,
         and 1 means they are exactly the same
     similarity_method: str
         Method used for computing similarity measure. Values can be:
         * normal - slow, exact comparison,
         * quick - faster, less exact comparison that returns upper bound for
             normal similarity ratio
         * real_quick - very fast, very inaccurate

     Returns
     -------
     float: similarity measure result if `return_similarity` is True,
         otherwise None
     '''
     if html_path:
         diff = difflib.HtmlDiff(
                 charjunk=junk_characters,
                 tabsize=4,
                 wrapcolumn=html_row_width)
         with open(html_path, 'w') as outfile:
             result = diff.make_file(in1, in2, context=True)
             outfile.write(result)
     if print_diff:
         diff = difflib.ndiff(
                 in1,
                 in2,
                 charjunk=junk_characters)
         print(''.join(diff))
     if return_similarity:
         seqmatcher = difflib.SequenceMatcher(None, ''.join(in1), ''.join(in2))
         if similarity_method == 'normal':
             return seqmatcher.ratio()
         elif similarity_method == 'quick':
             return seqmatcher.quick_ratio()
         elif similarity_method == 'real_quick':
             return seqmatcher.real_quick_ratio()


 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument(
             "input1",
             help="First Liberty file",
             type=Path)
     parser.add_argument(
             "input2",
             help="Second Liberty file",
             type=Path)
     parser.add_argument(
             "--output-html",
             help="Generate side-by-side diff to HTML file with given path",
             type=Path)
     parser.add_argument(
             "--html-row-width",
             help="Row width of the HTML side-by-side column",
             default=100,
             type=int)
     parser.add_argument(
             "--not-remove-comments",
             help="Do not remove comments before comparing files",
             action="store_true")
     parser.add_argument(
             "--not-move-entry-to-newline",
             help="Do not move content after closing bracket '}' to newline",
             action="store_true")
     parser.add_argument(
             "--not-remove-quotes",
             help="Do not remove quotes '\"' from documents before comparison",
             action="store_true")
     parser.add_argument(
             "--not-remove-whitespaces",
             help="Do not remove comments before comparing files",
             action="store_true")
     parser.add_argument(
             "--not-remove-line-breaks",
             help=r"Do not remove line breaks '\\n'",
             action="store_true")
     parser.add_argument(
             "--not-unify-numbers",
             help="Do not convert numbers in both files to unified form",
             action="store_true")
     parser.add_argument(
             "--print-diff",
             help="Print the diff to stdout",
             action="store_true")
     parser.add_argument(
             "--compute-similarity",
             help="Computes and prints the similarity between files (0-1)",
             action="store_true")
     parser.add_argument(
             "--similarity-method",
             help="The method used for computing similarity",
             type=str,
             default='quick',
             choices=['normal', 'quick', 'real_quick'])
     parser.add_argument(
             "--log-suppress-below",
             help="The mininal not suppressed log level",
             type=str,
             default="ERROR",
             choices=log_printer.LOGLEVELS)

     args = parser.parse_args()

     log_printer.SUPPRESSBELOW = args.log_suppress_below

     with open(args.input1, 'r') as input1:
         in1 = input1.readlines()
     with open(args.input2, 'r') as input2:
         in2 = input2.readlines()

     in1 = clean_lines(
             in1,
             not args.not_remove_comments,
             not args.not_move_entry_to_newline,
             not args.not_remove_quotes,
             not args.not_remove_whitespaces,
             not args.not_unify_numbers,
             not args.not_remove_line_breaks)

     in2 = clean_lines(
             in2,
             not args.not_remove_comments,
             not args.not_move_entry_to_newline,
             not args.not_remove_quotes,
             not args.not_remove_whitespaces,
             not args.not_unify_numbers,
             not args.not_remove_line_breaks)

     similarity = diff_files(
             in1,
             in2,
             args.print_diff,
             args.output_html,
             args.html_row_width,
             args.compute_similarity,
             args.similarity_method)
     if args.compute_similarity:
         print('Similarity between documents:  {}'.format(similarity))
	import difflib
	import argparse
	from pathlib import Path
	import re
	import log_printer


	def junk_characters(a):
	if a in ' \t\"':
	return True
	else:
	return False


	def clean_lines(
	lines,
	remove_comments=True,
	move_entry_to_newline=True,
	remove_quotes=True,
	remove_whitespaces=True,
	unify_numbers=True,
	remove_line_breaks=True):
	'''Performs file preprocessing before running diff.

	It is used for removing all semantically irrelevant characters that may
	blur the real differences between two files. By default it converts all
	tabs to single whitespace.

	Parameters
	----------
	lines: list
	list of Liberty lines to clean
	remove_comments: bool
	removes comments if True
	move_entry_to_newline: bool
	if True and when there is content present after the closing brace, it
	is moved to newline
	remove_quotes: bool
	remove all quotes characters from file if True
	remove_whitespaces: bool
	remove all whitespaces if True
	unify_numbers: bool
	unifies the number notation if True
	remove_line_breaks:
	when the line is broken into multiple lines with backslash, convert it
	to single line if True

	Returns
	-------
	list: cleaned lines
	'''
	# join all lines into single string
	fullfile = '\n'.join(lines)

	if remove_comments:
	# remove comments (C/C++ style)
	fullfile = re.sub(r'(?:\/\(.?)\\/)\|(?:\/\/(.?))', '',
	fullfile, flags=re.DOTALL)

	# remove comments (Python style)
	fullfile = re.sub(r'#[^\n]*\n', '', fullfile, flags=re.DOTALL)

	# replace all tabs with single space
	fullfile = fullfile.replace('\t', ' ')

	if move_entry_to_newline:
	# move non-whitespace content after } to new line
	fullfile = re.sub(r'}\s*(?!\n)', '}\n', fullfile, flags=re.DOTALL)

	if remove_quotes:
	# remove quotes
	fullfile = fullfile.replace('"', '')

	if remove_whitespaces:
	# remove whitespaces
	fullfile = fullfile.replace(' ', '')

	if remove_line_breaks:
	fullfile = re.sub(r'\\\s*\n', '', fullfile, flags=re.DOTALL)

	# split single string into lines
	lines = fullfile.split('\n')
	fullfile = ''

	if remove_whitespaces:
	# remove empty lines and trailing whitespaces
	lines = [line.rstrip() for line in lines if line.strip()]

	if unify_numbers:
	floats = re.compile(
	r'(?P<number>[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)')
	for i in range(len(lines)):
	lines[i] = floats.sub(
	lambda m: str(float(m.group('number'))),
	lines[i])

	# add newlines at the end of each line
	lines = [line + '\n' for line in lines]

	return lines


	def diff_files(
	in1,
	in2,
	print_diff=True,
	html_path=None,
	html_row_width=100,
	return_similarity=False,
	similarity_method='quick'):
	'''Compares two cleaned Liberty files and returns desired representations.

	Parameters
	----------
	in1: list
	List of cleaned lines for first Liberty file
	in2: list
	List of cleaned lines for second Liberty file
	print_diff: bool
	If True, function will print Differ-like diff to standard output
	html_path: str
	If None, does nothing. Otherwise, it generates HTML file containing
	side-by-side comparison of cleaned Liberty files in the path
	specified in the parameter
	html_row_width: int
	Number of characters within single row of one side of the HTML
	comparison
	return_similarity: bool
	If True, function will return the value from 0 to 1 representing the
	similarity between files, where 0 means they are completely different,
	and 1 means they are exactly the same
	similarity_method: str
	Method used for computing similarity measure. Values can be:
	* normal - slow, exact comparison,
	* quick - faster, less exact comparison that returns upper bound for
	normal similarity ratio
	* real_quick - very fast, very inaccurate

	Returns
	-------
	float: similarity measure result if `return_similarity` is True,
	otherwise None
	'''
	if html_path:
	diff = difflib.HtmlDiff(
	charjunk=junk_characters,
	tabsize=4,
	wrapcolumn=html_row_width)
	with open(html_path, 'w') as outfile:
	result = diff.make_file(in1, in2, context=True)
	outfile.write(result)
	if print_diff:
	diff = difflib.ndiff(
	in1,
	in2,
	charjunk=junk_characters)
	print(''.join(diff))
	if return_similarity:
	seqmatcher = difflib.SequenceMatcher(None, ''.join(in1), ''.join(in2))
	if similarity_method == 'normal':
	return seqmatcher.ratio()
	elif similarity_method == 'quick':
	return seqmatcher.quick_ratio()
	elif similarity_method == 'real_quick':
	return seqmatcher.real_quick_ratio()


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"input1",
	help="First Liberty file",
	type=Path)
	parser.add_argument(
	"input2",
	help="Second Liberty file",
	type=Path)
	parser.add_argument(
	"--output-html",
	help="Generate side-by-side diff to HTML file with given path",
	type=Path)
	parser.add_argument(
	"--html-row-width",
	help="Row width of the HTML side-by-side column",
	default=100,
	type=int)
	parser.add_argument(
	"--not-remove-comments",
	help="Do not remove comments before comparing files",
	action="store_true")
	parser.add_argument(
	"--not-move-entry-to-newline",
	help="Do not move content after closing bracket '}' to newline",
	action="store_true")
	parser.add_argument(
	"--not-remove-quotes",
	help="Do not remove quotes '\"' from documents before comparison",
	action="store_true")
	parser.add_argument(
	"--not-remove-whitespaces",
	help="Do not remove comments before comparing files",
	action="store_true")
	parser.add_argument(
	"--not-remove-line-breaks",
	help=r"Do not remove line breaks '\\n'",
	action="store_true")
	parser.add_argument(
	"--not-unify-numbers",
	help="Do not convert numbers in both files to unified form",
	action="store_true")
	parser.add_argument(
	"--print-diff",
	help="Print the diff to stdout",
	action="store_true")
	parser.add_argument(
	"--compute-similarity",
	help="Computes and prints the similarity between files (0-1)",
	action="store_true")
	parser.add_argument(
	"--similarity-method",
	help="The method used for computing similarity",
	type=str,
	default='quick',
	choices=['normal', 'quick', 'real_quick'])
	parser.add_argument(
	"--log-suppress-below",
	help="The mininal not suppressed log level",
	type=str,
	default="ERROR",
	choices=log_printer.LOGLEVELS)

	args = parser.parse_args()

	log_printer.SUPPRESSBELOW = args.log_suppress_below

	with open(args.input1, 'r') as input1:
	in1 = input1.readlines()
	with open(args.input2, 'r') as input2:
	in2 = input2.readlines()

	in1 = clean_lines(
	in1,
	not args.not_remove_comments,
	not args.not_move_entry_to_newline,
	not args.not_remove_quotes,
	not args.not_remove_whitespaces,
	not args.not_unify_numbers,
	not args.not_remove_line_breaks)

	in2 = clean_lines(
	in2,
	not args.not_remove_comments,
	not args.not_move_entry_to_newline,
	not args.not_remove_quotes,
	not args.not_remove_whitespaces,
	not args.not_unify_numbers,
	not args.not_remove_line_breaks)

	similarity = diff_files(
	in1,
	in2,
	args.print_diff,
	args.output_html,
	args.html_row_width,
	args.compute_similarity,
	args.similarity_method)
	if args.compute_similarity:
	print('Similarity between documents: {}'.format(similarity))