Basically a fork from https://github.com/blochberger/sokman but with the intention of adding a visual interface as well
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

68 lines
2.3 KiB

from typing import Set
import pickle
from django.db.models import Count
from pathlib import Path
from django.core.management.base import BaseCommand
from django.db.models import Count, Q
import sok.management.commands.dblpimport as dblp
from sok.models import Publication, SearchTerm
class Command(BaseCommand):
def echo(self, msg: str, bold=True):
if bold:
msg = self.style.HTTP_INFO(msg)
self.stdout.write(msg)
# BaseCommand
def handle(self, *args, **options):
publications_found: Set[str] = set()
publications_peer_reviewed: Set[str] = set()
publications_relevant: Set[str] = set()
self.echo("Loading DBLP dump...")
all_cite_keys = dblp.get_all_cite_keys(dblp.DUMP_PATH)
for search_term in SearchTerm.objects.all():
# DBLP search result
self.echo(f"Searching DBLP for '{search_term}'")
query, results, total = dblp.PublicationResult.from_search(search_term.name, 1000)
for result in results:
if result.cite_key not in all_cite_keys:
continue
publications_found.add(result.cite_key)
if result.is_peer_reviewed:
publications_peer_reviewed.add(result.cite_key)
# Relevant publications
for publication in Publication.objects.filter(
publicationsource__search_term=search_term,
exclusion_criteria__isnull=True,
).distinct():
publications_relevant.add(publication.cite_key)
self.cache_path = Path('.choices.semanticscholar.pickle')
if self.cache_path.exists():
self.echo("Loading choices from snowballing...")
with self.cache_path.open('rb') as f:
self.cache = pickle.load(f)
self.echo(f"{len(self.cache)} publications excluded during snowballing")
self.echo(f"{len(Publication.objects.filter(variant_of__isnull=True))} publications included in total so far")
# Output
self.echo(f"Total publications: {len(publications_found):4d}", bold=True)
self.echo(f"- peer reviewed: {len(publications_peer_reviewed):4d}", bold=True)
self.echo(f"- relevant: {len(publications_relevant):4d}", bold=True)
pubs = Publication.objects.values('title').annotate(count=Count('referenced_by')).values('title', 'year', 'count').order_by('count').reverse()
for p in pubs[:15]: print(f"{p['title']} ({p['year']}): {p['count']} citations")
years = Publication.objects.values('year').annotate(count=Count('year')).values('year', 'count')
for y in years: print(f"{y['year']}, {y['count']}")