python:pathlib

差分

このページの2つのバージョン間の差分を表示します。

この比較画面へのリンク

両方とも前のリビジョン 前のリビジョン
次のリビジョン
前のリビジョン
次のリビジョン両方とも次のリビジョン
python:pathlib [2020/02/01 20:14] – [参考文献] ともやんpython:pathlib [2020/03/26 09:31] ともやん
行 1: 行 1:
-====== pathlib, glob ======+<html> 
 +  <style> 
 +    #mincode pre { 
 +      /*height: 300px;*/ 
 +      overflow: scroll; 
 +      overflow-x: hidden; 
 +      font-size: 10px; 
 +    } 
 +    #mincode_long pre { 
 +      height: 400px; 
 +      overflow: scroll; 
 +      overflow-x: hidden; 
 +      font-size: 10px; 
 +    } 
 +    #mintbl table { 
 +      font-size: 12px; 
 +    } 
 +    .dokuwiki .plugin_wrap table { 
 +      width: auto; 
 +    } 
 +    #result pre { 
 +      /*height: 300px;*/ 
 +      overflow: scroll; 
 +      overflow-x: hidden; 
 +      font-size: 10px; 
 +    } 
 +  </style> 
 +</html> 
 +====== pathlib, glob, os ====== 
 + 
 +===== glob.iglob() の実装 ===== 
 +<WRAP prewrap 100% #mincode_long> 
 +<code python python38/Lib/glob.py> 
 +"""Filename globbing utility.""" 
 + 
 +import os 
 +import re 
 +import fnmatch 
 +import sys 
 + 
 +__all__ = ["glob", "iglob", "escape"
 + 
 +def glob(pathname, *, recursive=False): 
 +    """Return a list of paths matching a pathname pattern. 
 + 
 +    The pattern may contain simple shell-style wildcards a la 
 +    fnmatch. However, unlike fnmatch, filenames starting with a 
 +    dot are special cases that are not matched by '*' and '?' 
 +    patterns. 
 + 
 +    If recursive is true, the pattern '**' will match any files and 
 +    zero or more directories and subdirectories. 
 +    """ 
 +    return list(iglob(pathname, recursive=recursive)) 
 + 
 +def iglob(pathname, *, recursive=False): 
 +    """Return an iterator which yields the paths matching a pathname pattern. 
 + 
 +    The pattern may contain simple shell-style wildcards a la 
 +    fnmatch. However, unlike fnmatch, filenames starting with a 
 +    dot are special cases that are not matched by '*' and '?' 
 +    patterns. 
 + 
 +    If recursive is true, the pattern '**' will match any files and 
 +    zero or more directories and subdirectories. 
 +    """ 
 +    it = _iglob(pathname, recursive, False) 
 +    if recursive and _isrecursive(pathname): 
 +        s = next(it)  # skip empty string 
 +        assert not s 
 +    return it 
 + 
 +def _iglob(pathname, recursive, dironly): 
 +    sys.audit("glob.glob", pathname, recursive) 
 +    dirname, basename = os.path.split(pathname) 
 +    if not has_magic(pathname): 
 +        assert not dironly 
 +        if basename: 
 +            if os.path.lexists(pathname): 
 +                yield pathname 
 +        else: 
 +            # Patterns ending with a slash should match only directories 
 +            if os.path.isdir(dirname): 
 +                yield pathname 
 +        return 
 +    if not dirname: 
 +        if recursive and _isrecursive(basename): 
 +            yield from _glob2(dirname, basename, dironly) 
 +        else: 
 +            yield from _glob1(dirname, basename, dironly) 
 +        return 
 +    # `os.path.split()` returns the argument itself as a dirname if it is a 
 +    # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path 
 +    # contains magic characters (i.e. r'\\?\C:'). 
 +    if dirname != pathname and has_magic(dirname): 
 +        dirs = _iglob(dirname, recursive, True) 
 +    else: 
 +        dirs = [dirname] 
 +    if has_magic(basename): 
 +        if recursive and _isrecursive(basename): 
 +            glob_in_dir = _glob2 
 +        else: 
 +            glob_in_dir = _glob1 
 +    else: 
 +        glob_in_dir = _glob0 
 +    for dirname in dirs: 
 +        for name in glob_in_dir(dirname, basename, dironly): 
 +            yield os.path.join(dirname, name) 
 + 
 +# These 2 helper functions non-recursively glob inside a literal directory. 
 +# They return a list of basenames.  _glob1 accepts a pattern while _glob0 
 +# takes a literal basename (so it only has to check for its existence). 
 + 
 +def _glob1(dirname, pattern, dironly): 
 +    names = list(_iterdir(dirname, dironly)) 
 +    if not _ishidden(pattern): 
 +        names = (x for x in names if not _ishidden(x)) 
 +    return fnmatch.filter(names, pattern) 
 + 
 +def _glob0(dirname, basename, dironly): 
 +    if not basename: 
 +        # `os.path.split()` returns an empty basename for paths ending with a 
 +        # directory separator.  'q*x/' should match only directories. 
 +        if os.path.isdir(dirname): 
 +            return [basename] 
 +    else: 
 +        if os.path.lexists(os.path.join(dirname, basename)): 
 +            return [basename] 
 +    return [] 
 + 
 +# Following functions are not public but can be used by third-party code. 
 + 
 +def glob0(dirname, pattern): 
 +    return _glob0(dirname, pattern, False) 
 + 
 +def glob1(dirname, pattern): 
 +    return _glob1(dirname, pattern, False) 
 + 
 +# This helper function recursively yields relative pathnames inside a literal 
 +# directory. 
 + 
 +def _glob2(dirname, pattern, dironly): 
 +    assert _isrecursive(pattern) 
 +    yield pattern[:0] 
 +    yield from _rlistdir(dirname, dironly) 
 + 
 +# If dironly is false, yields all file names inside a directory. 
 +# If dironly is true, yields only directory names. 
 +def _iterdir(dirname, dironly): 
 +    if not dirname: 
 +        if isinstance(dirname, bytes): 
 +            dirname = bytes(os.curdir, 'ASCII'
 +        else: 
 +            dirname = os.curdir 
 +    try: 
 +        with os.scandir(dirname) as it: 
 +            for entry in it: 
 +                try: 
 +                    if not dironly or entry.is_dir(): 
 +                        yield entry.name 
 +                except OSError: 
 +                    pass 
 +    except OSError: 
 +        return 
 + 
 +# Recursively yields relative pathnames inside a literal directory. 
 +def _rlistdir(dirname, dironly): 
 +    names = list(_iterdir(dirname, dironly)) 
 +    for x in names: 
 +        if not _ishidden(x): 
 +            yield x 
 +            path = os.path.join(dirname, x) if dirname else x 
 +            for y in _rlistdir(path, dironly): 
 +                yield os.path.join(x, y) 
 + 
 + 
 +magic_check = re.compile('([*?[])'
 +magic_check_bytes = re.compile(b'([*?[])'
 + 
 +def has_magic(s): 
 +    if isinstance(s, bytes): 
 +        match = magic_check_bytes.search(s) 
 +    else: 
 +        match = magic_check.search(s) 
 +    return match is not None 
 + 
 +def _ishidden(path): 
 +    return path[0] in ('.', b'.'[0]) 
 + 
 +def _isrecursive(pattern): 
 +    if isinstance(pattern, bytes): 
 +        return pattern == b'**' 
 +    else: 
 +        return pattern == '**' 
 + 
 +def escape(pathname): 
 +    """Escape all special characters. 
 +    """ 
 +    # Escaping is done by wrapping any of "*?[" between square brackets. 
 +    # Metacharacters do not work in the drive part and shouldn't be escaped. 
 +    drive, pathname = os.path.splitdrive(pathname) 
 +    if isinstance(pathname, bytes): 
 +        pathname = magic_check_bytes.sub(br'[\1]', pathname) 
 +    else: 
 +        pathname = magic_check.sub(r'[\1]', pathname) 
 +    return drive + pathname 
 +</code> 
 +</WRAP> 
 + 
 +===== パフォーマンス比較 ===== 
 +<WRAP prewrap 100% #mincode> 
 +<code python dir_search.py> 
 +#!/usr/bin/env python3 
 +# -*- coding: utf-8 -*- 
 +import os 
 +import glob 
 +import timeit 
 + 
 +def walk(): 
 +    #result = [[file for file in files if file.endswith('.py')] for path, dirs, files in os.walk('.')] 
 +    result = [] 
 +    for path, dirs, files in os.walk('.'): 
 +        for file in files: 
 +            if file.endswith('.py'): 
 +                result.append(file) 
 +    return result 
 + 
 +def iglob(): 
 +    result = [] 
 +    for file in glob.iglob('**/*', recursive=True): 
 +        if file.endswith('.py'): 
 +            result.append(file) 
 +    return result 
 + 
 +def iglob2(): 
 +    result = [] 
 +    for file in glob.iglob('**/*.py', recursive=True): 
 +        result.append(file) 
 +    return result 
 + 
 +def main(): 
 +    # os.chdir('./'
 +    num_of_exec = 10 
 +    print(timeit.timeit(walk, number=num_of_exec) / num_of_exec) 
 +    print(timeit.timeit(iglob, number=num_of_exec) / num_of_exec) 
 +    print(timeit.timeit(iglob2, number=num_of_exec) / num_of_exec) 
 + 
 +if __name__ == '__main__': 
 +    main() 
 +</code> 
 +</WRAP> 
 +<WRAP prewrap 100% #result> 
 +<code> 
 +$ ./dir_search.py 
 +1.3243966199999704 
 +1.4991581099999167 
 +1.61260242999997 
 +$ time find . -name '*.py' > /dev/null 
 + 
 +real    0m0.523s 
 +user    0m0.020s 
 +sys     0m0.077s 
 +</code> 
 +</WRAP>
  
 ===== 参考文献 ===== ===== 参考文献 =====
 [[https://docs.python.org/ja/3/library/glob.html|glob --- Unix 形式のパス名のパターン展開 — Python 3.8.1 ドキュメント]]\\ [[https://docs.python.org/ja/3/library/glob.html|glob --- Unix 形式のパス名のパターン展開 — Python 3.8.1 ドキュメント]]\\
 [[https://stackoverflow.com/questions/20638040/glob-exclude-pattern|python - glob exclude pattern - Stack Overflow]]\\ [[https://stackoverflow.com/questions/20638040/glob-exclude-pattern|python - glob exclude pattern - Stack Overflow]]\\
-[[https://stackoverflow.com/questions/20638040/glob-exclude-pattern/36295481|python - glob exclude pattern - Stack Overflow]]\\ 
 [[https://github.com/python/cpython/blob/master/Lib/pathlib.py|cpython/pathlib.py at master · python/cpython]]\\ [[https://github.com/python/cpython/blob/master/Lib/pathlib.py|cpython/pathlib.py at master · python/cpython]]\\
 +[[https://stackoverflow.com/questions/50948391/whats-the-fastest-way-to-recursively-search-for-files-in-python/50950952|What's the fastest way to recursively search for files in python? - Stack Overflow]]\\
 +[[https://qiita.com/amowwee/items/e63b3610ea750f7dba1b|Pythonでフォルダ内のファイルリストを取得する - Qiita]]\\
  
  • python/pathlib.txt
  • 最終更新: 2023/05/27 09:00
  • by ともやん