V6 - with metadata connector
Write that metadata :-D
This commit is contained in:
parent
39e61b7f68
commit
4c18bd2cf9
1 changed files with 25 additions and 0 deletions
|
@ -12,6 +12,8 @@ import sys
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from metadata_writer import PDFMetadataWriter
|
||||||
|
|
||||||
|
|
||||||
class PDFProcessor:
|
class PDFProcessor:
|
||||||
def __init__(self, input_dir: str, output_dir: str, api_key: str, logger: logging.Logger = None):
|
def __init__(self, input_dir: str, output_dir: str, api_key: str, logger: logging.Logger = None):
|
||||||
|
@ -350,6 +352,15 @@ Examples:
|
||||||
parser.add_argument('--debug',
|
parser.add_argument('--debug',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Enable debug logging')
|
help='Enable debug logging')
|
||||||
|
|
||||||
|
parser.add_argument('--write-metadata',
|
||||||
|
action='store_true',
|
||||||
|
help='Write extracted metadata back to PDF files')
|
||||||
|
|
||||||
|
parser.add_argument('--no-backup',
|
||||||
|
action='store_true',
|
||||||
|
help='Skip creating backups when writing metadata')
|
||||||
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -390,6 +401,20 @@ Examples:
|
||||||
|
|
||||||
# Process files
|
# Process files
|
||||||
results = processor.process_pdfs()
|
results = processor.process_pdfs()
|
||||||
|
|
||||||
|
if args.write_metadata:
|
||||||
|
logger.info("Writing metadata back to PDF files...")
|
||||||
|
writer = PDFMetadataWriter(logger)
|
||||||
|
stats = writer.batch_write_metadata(results, backup=not args.no_backup)
|
||||||
|
|
||||||
|
logger.info("\nMetadata Writing Results:")
|
||||||
|
logger.info(f"Successfully updated: {stats['success_count']} files")
|
||||||
|
logger.info(f"Failed to update: {stats['failure_count']} files")
|
||||||
|
|
||||||
|
if stats['failure_count'] > 0:
|
||||||
|
logger.info("\nFailed files:")
|
||||||
|
for failed_file in stats['failure']:
|
||||||
|
logger.info(f" - {failed_file}")
|
||||||
|
|
||||||
# Cleanup temporary files unless --no-cleanup was specified
|
# Cleanup temporary files unless --no-cleanup was specified
|
||||||
if not args.no_cleanup:
|
if not args.no_cleanup:
|
||||||
|
|
Loading…
Add table
Reference in a new issue