import html import io import string import csv from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union import requests from django.db import transaction from django.core.management.base import BaseCommand, CommandParser, CommandError from sok.models import ( Author, Publication, PublicationAuthor, PublicationSource, PublicationTag, SearchTerm, Source, Tag, ) # TODO: anpassen für WoS? PUBLICATIONS = { 'article', 'inproceedings', 'proceedings', 'book', 'incollection', 'phdthesis', 'mastersthesis', 'www', 'person', 'data', } CITE_KEY_PREFIX = 'Z:' class Command(BaseCommand): def log_success(self, msg: str): self.stdout.write(self.style.SUCCESS(msg)) def log_info(self, msg: str, nl: bool = True): self.stdout.write(self.style.HTTP_INFO(msg), ending='\n' if nl else '') self.stdout.flush() # BaseCommand def add_arguments(self, parser: CommandParser): parser.add_argument('--search-term', default=None) parser.add_argument('--source', default='Zotero') parser.add_argument('zfile') @transaction.atomic def handle(self, *args, **options): source = Source.objects.get_or_create(name=options['source']) search_term: Optional[SearchTerm] = None if name := options['search_term']: search_term, created = SearchTerm.objects.get_or_create(name=name) if created: self.log_success(f"Created search term: {search_term}") publications: List[Publication] = [] zotero_file = options['zfile'] with open(zotero_file, 'r') as csvfile: reader = csv.DictReader(csvfile) for publ in reader: authors: List[Author] = [] for name in set(publ['Author'].split('; ')): author, created = Author.objects.get_or_create(name=name) if created: self.log_success(f"Added author: {author}") else: self.log_info(f"Author '{author}' already known") authors.append(author) tags: List[Tag] = [] for t in set(publ['Manual Tags'].split('; ')).union(publ['Automatic Tags'].split('; ')): if t == '': continue tag, created = Tag.objects.get_or_create(name=t) if created: self.log_success(f"Added tag: {tag}") else: self.log_info(f"Tag '{tag}' already exists") tags.append(tag) pages = (None, None) if '-' in publ['Pages']: pages = publ['Pages'].split('-') # Add publication to database publication, created = Publication.objects.get_or_create( cite_key=publ['Key'], title=publ['Title'], year=publ['Publication Year'], # TODO: peer_reviewed=result.is_peer_reviewed, first_page=pages[0], last_page=pages[1], doi=publ['DOI'] or None, abstract=publ['Abstract Note'] or None, ) if created: self.log_success(f"Added publication: {publication}") else: self.log_info(f"Publication '{publication}' already known") publications.append(publication) # Assign authors for position, author in enumerate(authors): publication_author, created = PublicationAuthor.objects.get_or_create( author=author, publication=publication, position=position, ) if created: self.log_success(f"Assigned author '{author}' to publication '{publication}' at position {position}") else: self.log_info(f"Author '{author}' already assigned to publication '{publication}' at position '{position}'") # Assign tags for position, tag in enumerate(tags): publication_tag, created = PublicationTag.objects.get_or_create( tag=tag, publication=publication, ) if created: self.log_success(f"Assigned tag '{tag}' to publication '{publication}'") else: self.log_info(f"Tag '{tag}' already assigned to publication '{publication}'") # Assign sources if search_term is not None: for publication in publications: publication_source, created = PublicationSource.objects.get_or_create( source=source, publication=publication, search_term=search_term, ) if created: self.log_success(f"Assigned source '{source}' to publication '{publication}' with search term '{search_term}'") else: self.log_info(f"Source '{source}' already assigned to publication '{publication}' with search term '{search_term}'")