Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: pymupdf/PyMuPDF
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: pymupdf/PyMuPDF
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: jules2
Choose a head ref
  • 6 commits
  • 8 files changed
  • 1 contributor

Commits on May 20, 2024

  1. pipcl.py: minor changes to diagnostics.

    Also removed trailing white space.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    3563a03 View commit details
  2. setup.py: reduced diagnostics.

    Also put generated files in src/build/, so that they are not remove by `git
    clean` unless `-d` is specified.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    7c2a475 View commit details
  3. src/__init__.py: patch one extra line to use _format_g().

    This patches up one place that was missed in earlier commit.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    1296fba View commit details
  4. src/__init__.py tests/conftest.py: check no calls to log() when runni…

    …ng tests.
    
    Also changed exception_info() to call log() instead of writing directly to
    _g_out_log, so that exception backtraces are also checked.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    35332d0 View commit details
  5. src/utils.py: disable various expected calls to pymupdf.exception_inf…

    …o().
    
    Addresses #3479 and #3488.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    7989919 View commit details
  6. Add pymupdf.get_text() - optionally concurrent call of Page.get_text(…

    …) on some/all pages.
    
    setup.py
        Add new _get_text.py to wheels/installs.
    src/__init__.py
        New top-level get_text() fn, calls _get_text.get(text).
    src/_get_text.py
        New, contains implementation of get_text().
    tests/test_pylint.py
        Avoid pylint failure by disabling `R0801: Similar lines in 2 files`.
    tests/test_textextract.py
        Test get_text() and show timings.
    
    Timings for MacOS-arm64 and PDF spec:
        method='multiprocessing' : 3.3x.
        method='fork': 3.6x.
    julian-smith-artifex-com committed May 20, 2024
    Copy the full SHA
    081ca67 View commit details
Showing with 323 additions and 58 deletions.
  1. +32 −29 pipcl.py
  2. +11 −15 setup.py
  3. +82 −3 src/__init__.py
  4. +144 −0 src/_get_text.py
  5. +9 −10 src/utils.py
  6. +4 −0 tests/conftest.py
  7. +4 −1 tests/test_pylint.py
  8. +37 −0 tests/test_textextract.py
61 changes: 32 additions & 29 deletions pipcl.py
Original file line number Diff line number Diff line change
@@ -237,21 +237,21 @@ class Package:
>>> assert len(so) == 1
>>> so = so[0]
>>> assert os.path.getmtime(so) > t0
Check `entry_points` causes creation of command `foo_cli` when we install
from our wheel using pip. [As of 2024-02-24 using pipcl's CLI interface
directly with `setup.py install` does not support entry points.]
>>> print('Creating venv.', file=sys.stderr)
>>> _ = subprocess.run(
... f'cd pipcl_test && {sys.executable} -m venv pylocal',
... shell=1, check=1)
>>> print('Installing from wheel into venv using pip.', file=sys.stderr)
>>> _ = subprocess.run(
... f'. pipcl_test/pylocal/bin/activate && pip install pipcl_test/dist/*.whl',
... shell=1, check=1)
>>> print('Running foo_cli.', file=sys.stderr)
>>> _ = subprocess.run(
... f'. pipcl_test/pylocal/bin/activate && foo_cli',
@@ -299,7 +299,7 @@ def __init__(self,
requires_external = None,
project_url = None,
provides_extra = None,

entry_points = None,

root = None,
@@ -374,21 +374,21 @@ def __init__(self,
entry_points:
String or dict specifying *.dist-info/entry_points.txt, for
example:
```
[console_scripts]
foo_cli = foo.__main__:main
```
or:
{ 'console_scripts': [
'foo_cli = foo.__main__:main',
],
}
See: https://packaging.python.org/en/latest/specifications/entry-points/
root:
Root of package, defaults to current directory.
@@ -684,7 +684,7 @@ def add_str(content, to_):
# Add <name>-<version>.dist-info/COPYING.
if self.license:
add_str(self.license, f'{dist_info_dir}/COPYING')

# Add <name>-<version>.dist-info/entry_points.txt.
entry_points_text = self._entry_points_text()
if entry_points_text:
@@ -735,15 +735,15 @@ def build_sdist(self,
os.makedirs(sdist_directory, exist_ok=True)
tarpath = f'{sdist_directory}/{prefix}.tar.gz'
log2(f'Creating sdist: {tarpath}')

with tarfile.open(tarpath, 'w:gz') as tar:

names_in_tar = list()
def check_name(name):
if name in names_in_tar:
raise Exception(f'Name specified twice: {name}')
names_in_tar.append(name)

def add(from_, name):
check_name(name)
if isinstance(from_, str):
@@ -757,7 +757,7 @@ def add(from_, name):
tar.addfile(ti, io.BytesIO(from_))
else:
assert 0

def add_string(text, name):
textb = text.encode('utf8')
return add(textb, name)
@@ -776,7 +776,7 @@ def add_string(text, name):
if to_rel == 'pyproject.toml':
found_pyproject_toml = True
add(from_, to_rel)

if not found_pyproject_toml:
log0(f'Warning: no pyproject.toml specified.')

@@ -884,10 +884,10 @@ def add_str(content, to_abs, to_rel):
add_file( from_, to_abs2, to_rel)

add_str( self._metainfo(), f'{root2}/{dist_info_dir}/METADATA', f'{dist_info_dir}/METADATA')

if self.license:
add_str( self.license, f'{root2}/{dist_info_dir}/COPYING', f'{dist_info_dir}/COPYING')

entry_points_text = self._entry_points_text()
if entry_points_text:
add_str(
@@ -1284,7 +1284,7 @@ def _fromto(self, p):
if isinstance(p, str):
p = p, p
assert isinstance(p, tuple) and len(p) == 2

from_, to_ = p
assert isinstance(from_, (str, bytes))
assert isinstance(to_, str)
@@ -1798,7 +1798,7 @@ def git_items( directory, submodules=False):
return ret


def run( command, capture=False, check=1):
def run( command, capture=False, check=1, verbose=1):
'''
Runs a command using `subprocess.run()`.
@@ -1818,6 +1818,8 @@ def run( command, capture=False, check=1):
check:
If true we raise an exception on error; otherwise we include the
command's returncode in our return value.
verbose:
If true we show the command.
Returns:
check capture Return
--------------------------
@@ -1828,7 +1830,8 @@ def run( command, capture=False, check=1):
'''
lines = _command_lines( command)
nl = '\n'
log2( f'Running: {nl.join(lines)}')
if verbose:
log1( f'Running: {nl.join(lines)}')
sep = ' ' if windows() else '\\\n'
command2 = sep.join( lines)
cp = subprocess.run(
@@ -1924,18 +1927,18 @@ def __init__(self):
stderr=subprocess.DEVNULL,
check=0,
).returncode
log1(f'{e=} from {pc!r}.')
log2(f'{e=} from {pc!r}.')
if e == 0:
python_config = pc
assert python_config, f'Cannot find python-config'
else:
python_config = f'{python_exe}-config'
log1(f'Using {python_config=}.')
log2(f'Using {python_config=}.')
try:
self.includes = run( f'{python_config} --includes', capture=1).strip()
self.includes = run( f'{python_config} --includes', capture=1, verbose=0).strip()
except Exception as e:
raise Exception('We require python development tools to be installed.') from e
self.ldflags = run( f'{python_config} --ldflags', capture=1).strip()
self.ldflags = run( f'{python_config} --ldflags', capture=1, verbose=0).strip()
if linux():
# It seems that with python-3.10 on Linux, we can get an
# incorrect -lcrypt flag that on some systems (e.g. WSL)
@@ -2100,7 +2103,7 @@ def run_if( command, out, *prerequisites):
if not doit:
out_mtime = _fs_mtime( out)
if out_mtime == 0:
doit = 'File does not exist: {out!e}'
doit = f'File does not exist: {out!r}'

cmd_path = f'{out}.cmd'
if os.path.isfile( cmd_path):
@@ -2155,7 +2158,7 @@ def _make_prerequisites(p):
os.remove( cmd_path)
except Exception:
pass
log2( f'Running command because: {doit}')
log1( f'Running command because: {doit}')

run( command)

@@ -2164,7 +2167,7 @@ def _make_prerequisites(p):
f.write( command)
return True
else:
log2( f'Not running command because up to date: {out!r}')
log1( f'Not running command because up to date: {out!r}')

if 0:
log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.'
@@ -2361,7 +2364,7 @@ def add_content(self, content, to_, verbose=True):
log2(f'Adding {to_}')

def add_file(self, from_, to_):
log2(f'Adding file: {os.path.relpath(from_)} => {to_}')
log1(f'Adding file: {os.path.relpath(from_)} => {to_}')
with open(from_, 'rb') as f:
content = f.read()
self.add_content(content, to_, verbose=False)
26 changes: 11 additions & 15 deletions setup.py
Original file line number Diff line number Diff line change
@@ -571,13 +571,6 @@ def build():
build_type,
)

for d in (
mupdf_build_dir,
f'{g_root}/src',
):
if d:
run(f'ls -l {os.path.relpath(d)}', check=0)

# Generate lists of (from, to) items to return to pipcl. We put MuPDF
# shared libraries in a separate list so that we can build specific wheels
# as determined by g_flavour.
@@ -613,21 +606,26 @@ def add( ret, from_, to_):

if path_so_leaf_b:
# Add rebased implementation files.
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py') # For `fitz` module alias.
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py') # For `fitz` module alias.
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py') # For `fitz` module alias.
to_dir = 'pymupdf/'
add( ret_p, f'{g_root}/src/__init__.py', to_dir)
add( ret_p, f'{g_root}/src/__main__.py', to_dir)
add( ret_p, f'{g_root}/src/pymupdf.py', to_dir)
add( ret_p, f'{g_root}/src/table.py', to_dir)
add( ret_p, f'{g_root}/src/utils.py', to_dir)
add( ret_p, f'{g_root}/src/extra.py', to_dir)
add( ret_p, f'{g_root}/src/{path_so_leaf_b}', to_dir)
add( ret_p, f'{g_root}/src/_get_text.py', to_dir)
add( ret_p, f'{g_root}/src/build/extra.py', to_dir)
add( ret_p, f'{g_root}/src/build/{path_so_leaf_b}', to_dir)

# Add support for `fitz` backwards compatibility.
add( ret_p, f'{g_root}/src/fitz___init__.py', 'fitz/__init__.py')
add( ret_p, f'{g_root}/src/fitz_table.py', 'fitz/table.py')
add( ret_p, f'{g_root}/src/fitz_utils.py', 'fitz/utils.py')

if mupdf_local:
# Add MuPDF Python API.
add( ret_p, f'{mupdf_build_dir}/mupdf.py', to_dir)

# Add MuPDF shared libraries.
if windows:
wp = pipcl.wdev.WindowsPython()
add( ret_p, f'{mupdf_build_dir}/_mupdf.pyd', to_dir)
@@ -659,8 +657,6 @@ def add( ret, from_, to_):
else:
add( ret, f'{g_root}/README.md', '$dist-info/README.md')

for f, t in ret:
log( f'build(): {f} => {t}')
return ret


@@ -963,7 +959,7 @@ def _build_extension_rebased( mupdf_local, mupdf_build_dir, build_type):
path_so_leaf_b = pipcl.build_extension(
name = 'extra',
path_i = f'{g_root}/src/extra.i',
outdir = f'{g_root}/src',
outdir = f'{g_root}/src/build',
includes = includes,
defines = defines,
libpaths = libpaths,
Loading