Browse Source

Add some things

master
Maya Herrscher 1 month ago
parent
commit
fa4981693c
  1. 18
      sok/management/commands/repair.py
  2. 19
      sok/management/commands/stats.py

18
sok/management/commands/repair.py

@ -92,9 +92,22 @@ class Command(BaseCommand):
def search_on_dblp(self, publication: Publication):
query, results, total = dblp.PublicationResult.from_search(publication.title, 100)
if total == 0: return
plausible = []
for result in results:
if publication.doi and result.doi:
if publication.doi.lower() == result.doi.lower():
plausible = [result]
break
else: continue # quite definitely not the same publication
elif publication.title == result.title and publication.year == result.year:
plausible.append(result)
self.log_warn(f"Not quite certain about {result.cite_key} for publication {publication.title}")
else: continue # I'd rather look at that in detail for now
if len(plausible) == 1:
result = plausible[0]
if result.access.lower() == 'withdrawn':
self.log_warn(f"Publication {publication.title} was WITHDRAWN!")
return
publication.cite_key = result.cite_key
publication.year = result.year
if not result.is_peer_reviewed == None:
@ -150,11 +163,6 @@ class Command(BaseCommand):
else:
self.log_info(f"Author '{author}' already assigned to publication '{publication}' at position '{position}'")
else: continue # quite definitely not the same publication
elif publication.title == result.title and publication.year == result.year:
self.log_warn(f"Not quite certain about {result.cite_key} for publication {publication.title}")
else: continue # I'd rather look at that in detail for now
def find_secondary_on_dblp(self):
self.log_info("--- Searching for snowballed sources on DBLP ---")
for publication in tqdm(Publication.objects.exclude(cite_key__startswith=dblp.CITE_KEY_PREFIX), unit="publication"):

19
sok/management/commands/stats.py

@ -1,4 +1,8 @@
from typing import Set
import pickle
from django.db.models import Count
from pathlib import Path
from django.core.management.base import BaseCommand
from django.db.models import Count, Q
@ -43,7 +47,22 @@ class Command(BaseCommand):
).distinct():
publications_relevant.add(publication.cite_key)
self.cache_path = Path('.choices.semanticscholar.pickle')
if self.cache_path.exists():
self.echo("Loading choices from snowballing...")
with self.cache_path.open('rb') as f:
self.cache = pickle.load(f)
self.echo(f"{len(self.cache)} publications excluded during snowballing")
self.echo(f"{len(Publication.objects.filter(variant_of__isnull=True))} publications included in total so far")
# Output
self.echo(f"Total publications: {len(publications_found):4d}", bold=True)
self.echo(f"- peer reviewed: {len(publications_peer_reviewed):4d}", bold=True)
self.echo(f"- relevant: {len(publications_relevant):4d}", bold=True)
pubs = Publication.objects.values('title').annotate(count=Count('referenced_by')).values('title', 'year', 'count').order_by('count').reverse()
for p in pubs[:15]: print(f"{p['title']} ({p['year']}): {p['count']} citations")
years = Publication.objects.values('year').annotate(count=Count('year')).values('year', 'count')
for y in years: print(f"{y['year']}, {y['count']}")

Loading…
Cancel
Save