Browse Source

Further improve snowballing

master
Maya Herrscher 2 months ago
parent
commit
854cdddad5
  1. 9
      sok/admin.py
  2. 17
      sok/management/commands/snowball.py

9
sok/admin.py

@ -64,6 +64,7 @@ class PublicationStageFilter(admin.SimpleListFilter):
return (
('primary', _("primary")),
('secondary', _("secondary")),
('2-secondary', _("2-secondary")),
('tertiary', _("tertiary")),
('excluded', _("excluded")),
('-', _("-")),
@ -95,6 +96,14 @@ class PublicationStageFilter(admin.SimpleListFilter):
referenced_by__sources__isnull=False,
)
if self.value() == '2-secondary':
ids: Set[int] = {
publication.id
for publication in queryset
if publication.stage == '2-secondary'
}
return queryset.filter(id__in=ids)
if self.value() == '-':
ids: Set[int] = {
publication.id

17
sok/management/commands/snowball.py

@ -124,7 +124,7 @@ class Command(BaseCommand):
if created:
self.echo(f"Added author: {author}")
else:
self.echo(f"Author '{author}' alreay known")
self.echo(f"Author '{author}' already known")
authors.append(author)
cite_key = ''
if authors:
@ -147,20 +147,20 @@ class Command(BaseCommand):
# Add publication to database
doi = data.get('doi', None)
if not publication:
self.echo(f"Will create now with cite key {cite_key}")
publication = Publication.objects.create(
cite_key=cite_key,
title=title,
year=data.get('year', 0),
peer_reviewed=None,
doi=doi,
abstract=data.get('abstract', None),
)
self.echo(f"Added publication: {publication}")
else:
self.echo(f"Publication '{publication}' already known")
# Assign authors
for position, author in enumerate(authors):
for position, author in enumerate(list(set(authors))):
publication_author, created = PublicationAuthor.objects.get_or_create(
author=author,
publication=publication,
@ -242,11 +242,11 @@ class Command(BaseCommand):
if abstract := data.get('abstract', None):
self.echo(abstract)
elif choice in {'', 'n', 'no'}:
# TODO Import? copied and adapted from PR
# DONE Import? copied and adapted from PR
if paper_id is not None:
self.add_publ(paper_id, base, is_reference)
else:
self.echo("Could not add this paper, please do it manually!")
self.warn("Could not add this paper, please do it manually!")
break
# BaseCommand
@ -255,11 +255,13 @@ class Command(BaseCommand):
parser.add_argument('--reset-choices', action='store_true')
parser.add_argument('--no-references', action='store_true')
parser.add_argument('--no-citations', action='store_true')
parser.add_argument('-s', '--stage', type=int, default=10000)
def handle(self, *args, **options):
reset_choices: bool = options['reset_choices']
no_citations: bool = options['no_citations']
no_references: bool = options['no_references']
stage: int = options['stage']
self.cache_path = Path('.choices.semanticscholar.pickle')
self.cache: Set[str] = set()
@ -275,9 +277,12 @@ class Command(BaseCommand):
semanticscholar__isnull=False,
exclusion_criteria__isnull=True,
)
if stage < 10000:
publications = [p for p in publications if p.stage_added() == stage]
self.echo(f"==== {len(publications)} publications from stage {stage} will be shown ====")
try:
for publication in tqdm(publications, unit="publication"):
self.echo(f"=== Publication {publication} ===")
self.echo(f"=== Publication {publication}: {publication.title} ===")
for semantic in publication.semanticscholar_set.all():
data = semanticscholar(semantic.paper_id)

Loading…
Cancel
Save