Browse Source

CSV import for Zotero files

webofscience
Maya Herrscher 1 week ago
parent
commit
0bc652f4d7
  1. 153
      sok/management/commands/zimport.py
  2. 6
      sokman/settings.py

153
sok/management/commands/zimport.py

@ -0,0 +1,153 @@
import html
import io
import string
import csv
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
import requests
from django.db import transaction
from django.core.management.base import BaseCommand, CommandParser, CommandError
from sok.models import (
Author,
Publication,
PublicationAuthor,
PublicationSource,
PublicationTag,
SearchTerm,
Source,
Tag,
)
# TODO: anpassen für WoS?
PUBLICATIONS = {
'article',
'inproceedings',
'proceedings',
'book',
'incollection',
'phdthesis',
'mastersthesis',
'www',
'person',
'data',
}
CITE_KEY_PREFIX = 'Z:'
class Command(BaseCommand):
def log_success(self, msg: str):
self.stdout.write(self.style.SUCCESS(msg))
def log_info(self, msg: str, nl: bool = True):
self.stdout.write(self.style.HTTP_INFO(msg), ending='\n' if nl else '')
self.stdout.flush()
# BaseCommand
def add_arguments(self, parser: CommandParser):
parser.add_argument('--search-term', default=None)
parser.add_argument('--source', default='Zotero')
parser.add_argument('zfile')
@transaction.atomic
def handle(self, *args, **options):
source = Source.objects.get_or_create(name=options['source'])
search_term: Optional[SearchTerm] = None
if name := options['search_term']:
search_term, created = SearchTerm.objects.get_or_create(name=name)
if created:
self.log_success(f"Created search term: {search_term}")
publications: List[Publication] = []
zotero_file = options['zfile']
with open(zotero_file, 'r') as csvfile:
reader = csv.DictReader(csvfile)
for publ in reader:
authors: List[Author] = []
for name in set(publ['Author'].split('; ')):
author, created = Author.objects.get_or_create(name=name)
if created:
self.log_success(f"Added author: {author}")
else:
self.log_info(f"Author '{author}' already known")
authors.append(author)
tags: List[Tag] = []
for t in set(publ['Manual Tags'].split('; ')).union(publ['Automatic Tags'].split('; ')):
if t == '': continue
tag, created = Tag.objects.get_or_create(name=t)
if created:
self.log_success(f"Added tag: {tag}")
else:
self.log_info(f"Tag '{tag}' already exists")
tags.append(tag)
pages = (None, None)
if '-' in publ['Pages']:
pages = publ['Pages'].split('-')
# Add publication to database
publication, created = Publication.objects.get_or_create(
cite_key=publ['Key'],
title=publ['Title'],
year=publ['Publication Year'],
# TODO: peer_reviewed=result.is_peer_reviewed,
first_page=pages[0],
last_page=pages[1],
doi=publ['DOI'] or None,
abstract=publ['Abstract Note'] or None,
)
if created:
self.log_success(f"Added publication: {publication}")
else:
self.log_info(f"Publication '{publication}' already known")
publications.append(publication)
# Assign authors
for position, author in enumerate(authors):
publication_author, created = PublicationAuthor.objects.get_or_create(
author=author,
publication=publication,
position=position,
)
if created:
self.log_success(f"Assigned author '{author}' to publication '{publication}' at position {position}")
else:
self.log_info(f"Author '{author}' already assigned to publication '{publication}' at position '{position}'")
# Assign tags
for position, tag in enumerate(tags):
publication_tag, created = PublicationTag.objects.get_or_create(
tag=tag,
publication=publication,
)
if created:
self.log_success(f"Assigned tag '{tag}' to publication '{publication}'")
else:
self.log_info(f"Tag '{tag}' already assigned to publication '{publication}'")
# Assign sources
if search_term is not None:
for publication in publications:
publication_source, created = PublicationSource.objects.get_or_create(
source=source,
publication=publication,
search_term=search_term,
)
if created:
self.log_success(f"Assigned source '{source}' to publication '{publication}' with search term '{search_term}'")
else:
self.log_info(f"Source '{source}' already assigned to publication '{publication}' with search term '{search_term}'")

6
sokman/settings.py

@ -89,9 +89,13 @@ WSGI_APPLICATION = 'sokman.wsgi.application'
DATABASES = { DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'jens.sqlite3',
},
'uc_sok': {
'ENGINE': 'django.db.backends.sqlite3', 'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3', 'NAME': BASE_DIR / 'db.sqlite3',
} },
} }

Loading…
Cancel
Save