» Python创建命令行程序grep » 2. 开发 » 2.3 添加基本功能

添加基本功能

想要达到项目目标,需要实现以下功能:

-c, --count
-i, --ignore-case
-n, --line-number
-r, --recursive
-v, --invert-match

grepy_cli.py:

import argparse

from grepy.grep import grep, grep_recursive, grep_count

def main():
    parser = argparse.ArgumentParser(description='''A grep-like command-line utility from LiteRank, 
                                     see https://literank.cn/project/13/intro''')
    parser.add_argument('pattern', type=str, help='The pattern to search for')
    parser.add_argument('file_path', type=str, help='The path to the file to search in')

    # 可选参数
    parser.add_argument('-c', '--count', action='store_true', help='Only a count of selected lines is written to standard output.')
    parser.add_argument('-i', '--ignore-case', action='store_true', help='Perform case insensitive matching. By default, it is case sensitive.')
    parser.add_argument('-n', '--line-number', action='store_true', help='Each output line is preceded by its relative line number in the file, starting at line 1. This option is ignored if -c is specified.')
    parser.add_argument('-r', '--recursive', action='store_true', help='Recursively search subdirectories listed.')
    parser.add_argument('-v', '--invert-match', action='store_true', help='Selected lines are those not matching any of the specified patterns.')

    args = parser.parse_args()

    if args.recursive:
        result = grep_recursive(args.pattern, args.file_path, get_options(args))
    else:
        result = grep(args.pattern, args.file_path, get_options(args))

    if args.count:
        print(grep_count(result))
    else:
        print_result(result, args.line_number)

def get_options(args):
    options = []
    if args.ignore_case:
        options.append('i')
    if args.invert_match:
        options.append('v')
    return options

def print_result(result, line_number_option):
    current_file = None
    file_count = len(result)
    for file_path, lines in result.items():
        for (line_number, line) in lines:
            if file_count > 1 and file_path != current_file:
                current_file = file_path
                print(f"\n{file_path}:")
            if line_number_option:
                print(f"{line_number}: {line}")
            else:
                print(line)

if __name__ == '__main__':
    main()

argparse 加入所有可选参数。print_result 函数解析结果字典并按需打印 file_path (文件路径)和 line_number(行号)。

grepy/grep.py:

import re
import os

def _filter_lines(pattern, lines, flag):
    return [(line_number, line.strip()) for line_number, line in enumerate(lines, start=1) if bool(re.search(pattern, line)) == flag]

def grep(pattern, file_path, options=None):
    with open(file_path, 'r') as file:
        try:
            lines = file.readlines()
        except UnicodeDecodeError: # 过滤掉二进制文件
            return {file_path: []}

        if options:
            if 'i' in options:
                pattern = re.compile(pattern, re.IGNORECASE)
            if 'v' in options:
                matching_lines = _filter_lines(pattern, lines, False)
            else:
                matching_lines = _filter_lines(pattern, lines, True)
        else:
            matching_lines = _filter_lines(pattern, lines, True)

    return {file_path: matching_lines}

def grep_count(result):
    return sum([len(v) for v in result.values()])

def grep_recursive(pattern, directory_path, options=None):
    results = {}

    for root, _, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)
            results.update(grep(pattern, file_path, options))

    return results

grep 函数添加“不区分大小写匹配”和“反向匹配”逻辑。grep_recursive 函数递归地列出所有文件,执行 grep 操作,然后输出到字典 result 中。

常规用法:

python3 -m grepy_cli result grepy_cli.py

结果:

result = grep_recursive(args.pattern, args.file_path, get_options(args))
result = grep(args.pattern, args.file_path, get_options(args))
print(grep_count(result))
print_result(result, args.line_number)
def print_result(result, line_number_option):
file_count = len(result)
for file_path, lines in result.items():

计数:

python3 -m grepy_cli -c result grepy_cli.py

结果数字:7.

显示行号:

python3 -m grepy_cli -n result grepy_cli.py

结果:

21: result = grep_recursive(args.pattern, args.file_path, get_options(args))
23: result = grep(args.pattern, args.file_path, get_options(args))
26: print(grep_count(result))
28: print_result(result, args.line_number)
38: def print_result(result, line_number_option):
40: file_count = len(result)
41: for file_path, lines in result.items():

使用正则:

python3 -m grepy_cli -n "\br[a-z]+t" grepy_cli.py

结果:

15: parser.add_argument('-n', '--line-number', action='store_true', help='Each output line is preceded by its relative line number in the file, starting at line 1. This option is ignored if -c is specified.')
22: result = grep_recursive(args.pattern, args.file_path, get_options(args))
24: result = grep(args.pattern, args.file_path, get_options(args))
27: print(grep_count(result))
29: print_result(result, args.line_number)
37: return options
39: def print_result(result: Dict[str, MatchResults], line_number_option: bool):
41: file_count = len(result)
42: for file_path, lines in result.items():

反向匹配:

python3 -m grepy_cli -vn result grepy_cli.py

结果:

1: import argparse
2: 
3: from grepy.grep import grep, grep_recursive, grep_count
4: 
5: def main():

...

8: parser.add_argument('pattern', type=str, help='The pattern to search for')
9: parser.add_argument('file_path', type=str, help='The path to the file to search in')

...

50: 
51: if __name__ == '__main__':
52: main()

不区分大小写匹配:

python3 -m grepy_cli -i Only grepy_cli.py

结果:

parser.add_argument('-c', '--count', action='store_true', help='Only a count of selected lines is written to standard output.')

递归匹配:

python3 -m grepy_cli -r count .

结果:

./grepy_cli.py:
from grepy.grep import grep, grep_recursive, grep_count
parser.add_argument('-c', '--count', action='store_true', help='Only a count of selected lines is written to standard output.')
if args.count:
print(grep_count(result))
file_count = len(result)
if file_count > 1 and file_path != current_file:

./grepy/grep.py:
def grep_count(result):
上页下页