V6 - with metadata connector
Write that metadata :-D
This commit is contained in:
parent
39e61b7f68
commit
4c18bd2cf9
1 changed files with 25 additions and 0 deletions
|
@ -12,6 +12,8 @@ import sys
|
|||
from tqdm import tqdm
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from metadata_writer import PDFMetadataWriter
|
||||
|
||||
|
||||
class PDFProcessor:
|
||||
def __init__(self, input_dir: str, output_dir: str, api_key: str, logger: logging.Logger = None):
|
||||
|
@ -351,6 +353,15 @@ Examples:
|
|||
action='store_true',
|
||||
help='Enable debug logging')
|
||||
|
||||
parser.add_argument('--write-metadata',
|
||||
action='store_true',
|
||||
help='Write extracted metadata back to PDF files')
|
||||
|
||||
parser.add_argument('--no-backup',
|
||||
action='store_true',
|
||||
help='Skip creating backups when writing metadata')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate directories
|
||||
|
@ -391,6 +402,20 @@ Examples:
|
|||
# Process files
|
||||
results = processor.process_pdfs()
|
||||
|
||||
if args.write_metadata:
|
||||
logger.info("Writing metadata back to PDF files...")
|
||||
writer = PDFMetadataWriter(logger)
|
||||
stats = writer.batch_write_metadata(results, backup=not args.no_backup)
|
||||
|
||||
logger.info("\nMetadata Writing Results:")
|
||||
logger.info(f"Successfully updated: {stats['success_count']} files")
|
||||
logger.info(f"Failed to update: {stats['failure_count']} files")
|
||||
|
||||
if stats['failure_count'] > 0:
|
||||
logger.info("\nFailed files:")
|
||||
for failed_file in stats['failure']:
|
||||
logger.info(f" - {failed_file}")
|
||||
|
||||
# Cleanup temporary files unless --no-cleanup was specified
|
||||
if not args.no_cleanup:
|
||||
logger.info("Cleaning up temporary files...")
|
||||
|
|
Loading…
Add table
Reference in a new issue