2024-01-17 22:02:56 +00:00
"""
Process logs derived from social cataloguing site data exports , using various APIs .
"""
2024-01-14 14:00:07 +00:00
import json
import os
2024-01-14 15:11:01 +00:00
import re
2024-01-14 14:00:07 +00:00
import time
2024-01-17 22:02:56 +00:00
import requests
2024-05-05 09:51:22 +00:00
from slugify import slugify
2024-01-17 22:02:56 +00:00
from dotenv import load_dotenv
2024-01-17 19:23:35 +00:00
from add_item import cleanup_result , import_by_id , setup_logger
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
logger = setup_logger ( " process_logs " )
2024-01-14 15:11:01 +00:00
load_dotenv ( )
2024-01-17 21:17:29 +00:00
TMDB_API_KEY = os . getenv ( " TMDB_API_KEY " )
TVDB_API_KEY = os . getenv ( " TVDB_API_KEY " )
if " " == TMDB_API_KEY :
logger . warning ( " TMDB API key not found " )
if " " == TVDB_API_KEY :
logger . warning ( " TVDB API key not found " )
2024-01-14 15:11:01 +00:00
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
def process_log ( media_type , log ) - > None :
2024-01-17 19:23:35 +00:00
""" Run through a log and call the appropriate API for each item found """
logger . info ( f " Processing { media_type } / { log } … " )
2024-01-14 14:00:07 +00:00
2024-05-05 09:51:22 +00:00
with open ( f " ./data/ { media_type } / { log } .json " , " r " , encoding = " utf-8 " ) as log_file :
2024-01-14 14:00:07 +00:00
log_items = json . load ( log_file )
log_item_values = { }
2024-02-25 18:53:44 +00:00
id_key = " "
if " books " == media_type :
id_key = " ol_id "
elif media_type in [ " films " , " tv-series " , " tv-episodes " ] :
id_key = " tmdb_id "
elif " games " == media_type :
id_key = " gb_id "
2024-01-14 14:00:07 +00:00
for i , item in enumerate ( log_items ) :
2024-05-05 09:51:22 +00:00
if id_key not in item : # and "skip" not in item:
if media_type in [ " films " , " books " ] :
2024-12-22 17:47:01 +00:00
item_title = item [ " Title " ] if " Title " in item else item [ " title " ]
2024-05-05 09:51:22 +00:00
elif " tv-episodes " == media_type :
item_title = item [ " Episode Title " ]
elif " tv-series " == media_type :
item_title = item [ " Show Title " ]
2024-12-22 17:47:01 +00:00
logger . info ( f " Processing { item_title } ( { item [ ' Author ' ] if " Author " in item else item [ " authors " ] if " authors " in item else " No Author " } )… " )
2024-05-05 09:51:22 +00:00
# Rename pre-existing fields
if " Date Added " in item :
log_item_values [ " date_added " ] = item [ " Date Added " ]
del item [ " Date Added " ]
if " date_added " in item :
log_item_values [ " date_added " ] = item [ " date_added " ]
if " Date Started " in item :
log_item_values [ " date_started " ] = item [ " Date Started " ]
del item [ " Date Started " ]
if " date_started " in item :
log_item_values [ " date_started " ] = item [ " date_started " ]
if " Date Finished " in item :
log_item_values [ " date_finished " ] = item [ " Date Finished " ]
del item [ " Date Finished " ]
if " Date Read " in item :
if item [ " Date Finished " ] == item [ " Date Read " ] :
del item [ " Date Read " ]
2024-01-15 21:43:23 +00:00
else :
2024-05-05 09:51:22 +00:00
raise Exception (
f " ' Date Read ' != ' Date Finished ' for { item [ ' Title ' ] } "
2024-01-17 21:17:29 +00:00
)
2024-01-15 21:43:23 +00:00
2024-05-05 09:51:22 +00:00
if " date_finished " in item :
log_item_values [ " date_finished " ] = item [ " date_finished " ]
2024-01-14 15:11:01 +00:00
2024-05-05 09:51:22 +00:00
if " Read Count " in item :
log_item_values [ " read_count " ] = item [ " Read Count " ]
del item [ " Read Count " ]
if " read_count " in item :
log_item_values [ " read_count " ] = item [ " read_count " ]
if " Date Watched " in item :
log_item_values [ " date_finished " ] = item [ " Date Watched " ]
del item [ " Date Watched " ]
if " Rewatch " in item :
log_item_values [ " is_repeat " ] = item [ " Rewatch " ]
del item [ " Rewatch " ]
if " Comments " in item :
log_item_values [ " comments " ] = item [ " Comments " ]
del item [ " Comments " ]
if " Series Title " in item :
log_item_values [ " series_title " ] = item [ " Series Title " ]
del item [ " Series Title " ]
if " Episode Title " in item :
log_item_values [ " name " ] = item [ " Episode Title " ]
del item [ " Episode Title " ]
if " Episode Number " in item :
if re . search ( " [0-9]+x[0-9]+ " , item [ " Episode Number " ] ) is not None :
season_no , _ , episode_no = log_item_values [
" episode_number "
] . split ( " x " )
elif (
re . search ( " S[0-9]+E[0-9]+ " , item [ " Episode Number " ] ) is not None
) :
season_no , _ , episode_no = log_item_values [
" episode_number "
] . split ( " E " )
elif re . search ( " E[0-9]+ " , item [ " Episode Number " ] ) is not None :
season_no = None
episode_no = item [ " episode_number " ] [ 1 : ]
else :
logger . error (
f " Invalid episode number format ' { item [ ' Episode Number ' ] } ' "
)
return
log_item_values [ " season_number " ] = season_no
log_item_values [ " episode_number " ] = episode_no
del item [ " Episode Number " ]
if " IMDB ID " in item and item [ " IMDB ID " ] != " " :
new_log_item = import_by_id ( item [ " IMDB ID " ] , media_type )
elif " books " == media_type and " wishlist " == log :
new_log_item = import_by_details ( item , item_title , media_type )
if new_log_item is None :
ol_work_id = input (
2024-12-22 17:47:01 +00:00
f " Enter OpenLibrary Work ID for ' { item_title } ' ( { item [ ' Author ' ] if " Author " in item else item [ " authors " ] if " authors " in item else " No Author " } ), or ' d ' to delete the record: "
2024-05-05 09:51:22 +00:00
)
if ' d ' == ol_work_id :
logger . info ( " Deleting… " )
del log_items [ i ]
continue
ol_work_id = re . search ( " OL[0-9]+W " , ol_work_id )
2024-01-15 21:43:23 +00:00
2024-02-25 18:53:44 +00:00
try :
new_log_item = import_by_id ( ol_work_id [ 0 ] , media_type , log )
2024-05-05 09:51:22 +00:00
2024-02-25 18:53:44 +00:00
except :
2024-03-06 21:34:20 +00:00
new_log_item = item
2024-05-05 09:51:22 +00:00
new_log_item [ " skip " ] = True
2024-02-25 18:53:44 +00:00
logger . info ( " Skipping… " )
2024-05-05 09:51:22 +00:00
elif (
" ISBN13 " in item
and item [ " ISBN13 " ] != " "
and item [ " ISBN13 " ] is not None
) :
new_log_item = import_by_id ( item [ " ISBN13 " ] , media_type , log )
2024-01-23 18:57:22 +00:00
2024-05-05 09:51:22 +00:00
elif " ISBN " in item and item [ " ISBN " ] != " " and item [ " ISBN " ] is not None :
new_log_item = import_by_id ( item [ " ISBN13 " ] , media_type , log )
2024-01-23 18:57:22 +00:00
2024-05-05 09:51:22 +00:00
else :
new_log_item = import_by_details ( item , item_title , media_type )
2024-01-14 15:11:01 +00:00
2024-05-05 09:51:22 +00:00
if new_log_item is None :
if (
media_type in [ " films " , " tv-series " , " tv-episodes " ]
and " imdb_id " not in item
) :
item [ " imdb_id " ] = input ( f " Enter IMDB ID for { item_title } : " )
if re . search ( " tt[0-9]+ " , item [ " imdb_id " ] ) is not None :
log_items [ i ] = import_by_id ( item [ " imdb_id " ] , media_type )
with open (
f " ./data/ { media_type } / { log } .json " , " w " , encoding = " utf-8 "
) as log_file :
json . dump ( log_items , log_file , indent = 4 )
2024-01-14 15:11:01 +00:00
2024-05-05 09:51:22 +00:00
elif " books " == media_type :
if " ISBN " not in item and " ISBN13 " not in item :
item [ " ISBN " ] = input ( f " Enter ISBN for { item_title } : " )
if re . search ( " [0-9-]+ " , item [ " ISBN " ] ) is not None :
log_items [ i ] = import_by_id ( item [ " ISBN " ] , media_type )
2024-01-14 15:11:01 +00:00
2024-01-23 18:57:22 +00:00
with open (
f " ./data/ { media_type } / { log } .json " ,
" w " ,
2024-05-05 09:51:22 +00:00
encoding = " utf-8 " ,
2024-01-23 18:57:22 +00:00
) as log_file :
json . dump ( log_items , log_file , indent = 4 )
2024-01-14 15:11:01 +00:00
else :
2024-05-05 09:51:22 +00:00
logger . warning ( f " Skipped ' { item_title } ' " )
log_items [ i ] [ " skip " ] = True
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
else :
2024-05-05 09:51:22 +00:00
logger . warning ( f " Skipped { item_title } " )
2024-01-17 21:17:29 +00:00
2024-05-05 09:51:22 +00:00
else :
log_items [ i ] = new_log_item
2024-01-17 21:17:29 +00:00
2024-05-05 09:51:22 +00:00
if i % 3 == 0 :
with open (
f " ./data/ { media_type } / { log } .json " , " w " , encoding = " utf-8 "
) as log_file :
json . dump ( log_items , log_file , indent = 4 )
logger . info ( " Saved… " )
2024-01-14 15:11:01 +00:00
2024-05-05 09:51:22 +00:00
if log_items [ i ] is not None :
log_items [ i ] | = log_item_values
2024-01-14 14:00:07 +00:00
2024-05-05 09:51:22 +00:00
with open ( f " ./data/ { media_type } / { log } .json " , " w " , encoding = " utf-8 " ) as log_file :
2024-01-14 14:00:07 +00:00
json . dump ( log_items , log_file , indent = 4 )
2024-01-17 19:23:35 +00:00
logger . info ( f " Finished processing { media_type } / { log } " )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
def import_by_details ( item , item_title , media_type ) - > dict :
2024-01-17 19:23:35 +00:00
""" Import an item when lacking a unique identifier """
2024-01-17 21:17:29 +00:00
if media_type in [ " films " , " tv-series " ] :
2024-01-14 14:00:07 +00:00
return import_from_tmdb_by_details ( item , item_title , media_type )
2024-01-15 21:43:23 +00:00
2024-01-17 22:02:56 +00:00
if media_type in [ " tv-episodes " ] :
2024-01-17 21:17:29 +00:00
return # import_from_tvdb_by_details(item, item_title, media_type)
2024-01-15 21:43:23 +00:00
2024-01-17 22:02:56 +00:00
if media_type in [ " books " ] :
2024-05-05 09:51:22 +00:00
return import_from_openlibrary_by_details ( item , item_title , media_type )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
if media_type in [ " games " ] :
2024-01-17 21:17:29 +00:00
return # import_from_igdb_by_details(item, item_title, media_type)
2024-01-15 21:43:23 +00:00
2024-01-14 14:00:07 +00:00
2024-05-05 09:51:22 +00:00
def import_from_openlibrary_by_details ( item , item_title , media_type ) - > dict | None :
""" Retrieve a book from OpenLibrary using a title and author name """
logger . info ( f " Importing ' { item_title } ' … " )
2024-12-22 17:47:01 +00:00
api_url = f " https://openlibrary.org/search.json?title= { slugify ( ( item [ ' Title ' ] if " Title " in item else item [ " title " ] ) . split ( ' : ' ) [ 0 ] , separator = ' % 20 ' ) } &author= { slugify ( ( item [ ' Author ' ] if " Author " in item else item [ " authors " ] if " authors " in item else " No Author " ) , separator = ' % 20 ' ) } "
2024-05-05 09:51:22 +00:00
# Sending API request
response = requests . get ( api_url , headers = { " accept " : " application/json " } , timeout = 15 )
# Process the response
if 200 == response . status_code :
logger . debug ( response . status_code )
elif 429 == response . status_code :
time . sleep ( 2 )
return import_from_openlibrary_by_details ( item , item_title , media_type )
elif 404 == response . status_code :
logger . error ( f " { response . status_code } : Not Found for title ' { item_title } ' " )
return None
else :
raise Exception ( f " Error { response . status_code } : { response . text } " )
results = json . loads ( response . text )
logger . info ( f " Found { results [ ' num_found ' ] } result { ' s ' if results [ ' num_found ' ] != 1 else ' ' } … " )
if 0 < results [ " num_found " ] :
result = results [ ' docs ' ] [ 0 ]
if 1 == results [ " num_found " ] :
logger . info ( f " Selecting OL ID { result [ ' key ' ] } … " )
item_id_parsed = re . search ( " (OL|tt)?[0-9]+[WMA]? " , result [ ' key ' ] )
if item_id_parsed is not None :
return import_by_id ( item_id_parsed [ 0 ] , " books " , " wishlist " )
else :
if result [ ' title ' ] == item [ ' Title ' ] . split ( ' : ' ) [ 0 ] and result [ ' author_name ' ] [ 0 ] == item [ ' Author ' ] :
logger . info ( f " First result ( { result [ ' key ' ] } ) is a match! " )
item_id_parsed = re . search ( " (OL|tt)?[0-9]+[WMA]? " , result [ ' key ' ] )
if item_id_parsed is not None :
return import_by_id ( item_id_parsed [ 0 ] , " books " , " wishlist " )
else :
print ( json . dumps ( { k : result . get ( k , None ) for k in ( ' author ' , ' title ' , ' first_publish_year ' ) } , indent = 4 ) )
is_correct = input ( " Is this the correct result? [y/n]: " )
if " y " == is_correct :
logger . info ( f " Selecting OL ID { result [ ' key ' ] } … " )
item_id_parsed = re . search ( " (OL|tt)?[0-9]+[WMA]? " , result [ ' key ' ] )
if item_id_parsed is not None :
return import_by_id ( item_id_parsed [ 0 ] , " books " , " wishlist " )
logger . info ( f " Returning nothing… " )
return None
2024-01-17 22:02:56 +00:00
def import_from_tmdb_by_details ( item , item_title , media_type ) - > dict :
2024-01-14 14:00:07 +00:00
""" Retrieve a film or TV series from TMDB using its title """
2024-01-17 19:23:35 +00:00
logger . info ( f " Processing { item_title } … " )
2024-01-14 14:00:07 +00:00
2024-01-14 15:11:01 +00:00
api_url = f " https://api.themoviedb.org/3/search/ { ' movie ' if ' films ' == media_type else ' tv ' } "
2024-01-14 14:00:07 +00:00
# Sending API request
response = requests . get (
api_url ,
params = {
2024-01-17 21:17:29 +00:00
" query " : item_title ,
" include_adult " : True ,
" year " : item [ " Release Year " ] if " Release Year " in item else None ,
2024-01-14 14:00:07 +00:00
} ,
2024-01-17 21:17:29 +00:00
headers = { " Authorization " : f " Bearer { TMDB_API_KEY } " } ,
2024-05-05 09:51:22 +00:00
timeout = 15 ,
2024-01-14 14:00:07 +00:00
)
# Process the response
2024-01-17 21:17:29 +00:00
if 200 == response . status_code :
logger . debug ( response . status_code )
elif 429 == response . status_code :
2024-01-14 14:00:07 +00:00
time . sleep ( 2 )
2024-01-17 22:02:56 +00:00
return import_from_tmdb_by_details ( item , item_title , media_type )
2024-01-14 14:00:07 +00:00
else :
2024-01-17 19:23:35 +00:00
logger . error ( response . text )
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
response_data = json . loads ( response . text ) [ " results " ]
2024-01-14 14:00:07 +00:00
if 1 == len ( response_data ) :
2024-01-15 21:43:23 +00:00
return cleanup_result ( response_data [ 0 ] , media_type )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
if 0 == len ( response_data ) :
2024-01-17 19:23:35 +00:00
logger . warning ( f " Returned no { media_type } for { item_title } " )
2024-01-14 14:00:07 +00:00
elif 1 < len ( response_data ) :
2024-01-17 21:17:29 +00:00
if " films " == media_type :
title_key = " title "
elif " tv-series " == media_type :
title_key = " name "
2024-01-14 15:11:01 +00:00
2024-01-17 21:17:29 +00:00
filtered_response_data = [
result for result in response_data if result [ title_key ] == item_title
]
2024-01-17 22:02:56 +00:00
frd_len = len ( filtered_response_data )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
if 1 == frd_len :
2024-01-15 21:43:23 +00:00
return cleanup_result ( response_data [ 0 ] , media_type )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
logger . warning ( f " Returned more than one { media_type } for ' { item_title } ' : \n " )
print (
json . dumps (
2024-05-05 09:51:22 +00:00
(
filtered_response_data
if len ( filtered_response_data ) > 0
else response_data
) ,
2024-01-17 22:02:56 +00:00
indent = 4 ,
2024-01-17 21:17:29 +00:00
)
2024-01-17 22:02:56 +00:00
)
last_index = len ( filtered_response_data if frd_len > 0 else response_data ) - 1
2024-05-05 09:51:22 +00:00
idx = input ( f " \n Enter the index of the result to use [0- { last_index } ]: " )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
if " " != idx :
try :
return cleanup_result ( response_data [ int ( idx ) ] , media_type )
2024-01-15 21:43:23 +00:00
2024-01-17 22:02:56 +00:00
except Exception as exc :
raise Exception ( " Index invalid " ) from exc
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
item [ " IMDB ID " ] = input ( f " Enter IMDB ID for { item_title } : " )
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
if " " != item [ " IMDB ID " ] :
return import_by_id ( item [ " IMDB ID " ] , media_type )
2024-01-14 14:00:07 +00:00
2024-01-23 18:57:22 +00:00
logger . warning ( f " Skipped { media_type } ' { item_title } ' " )
2024-01-17 22:02:56 +00:00
return item
def main ( ) - > None :
""" Prompt user to select media type and log to process """
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
media_type = " "
while media_type not in [ " films " , " tv-episodes " , " tv-series " , " books " ] :
2024-01-17 19:23:35 +00:00
media_type = input ( " Select media type [films|tv-episodes|tv-series|books]: " )
try :
2024-01-17 21:17:29 +00:00
if " films " == media_type :
log = " "
while log not in [ " log " , " wishlist " ] :
log = input ( " Enter log to process [log|wishlist]: " )
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
elif " books " == media_type :
log = " "
while log not in [ " log " , " current " , " wishlist " ] :
log = input ( " Enter log to process [log|current|wishlist]: " )
2024-01-14 14:00:07 +00:00
2024-01-23 18:57:22 +00:00
elif " tv-series " == media_type :
log = " log "
2024-01-14 14:00:07 +00:00
2024-01-17 21:17:29 +00:00
elif " tv-series " == media_type :
log = " "
while log not in [ " log " , " current " , " wishlist " ] :
log = input ( " Enter log to process [log|current|wishlist]: " )
2024-01-15 21:43:23 +00:00
2024-01-23 18:57:22 +00:00
process_log ( media_type , log )
2024-01-14 14:00:07 +00:00
2024-01-17 22:02:56 +00:00
except Exception :
2024-01-17 19:23:35 +00:00
logger . exception ( " Exception occurred " )
2024-01-14 14:00:07 +00:00
2024-01-17 19:23:35 +00:00
if __name__ == " __main__ " :
main ( )