Skip to content

Commit

Permalink
Added limit to queries
Browse files Browse the repository at this point in the history
  • Loading branch information
compscidr committed Apr 20, 2024
1 parent 8de3b9a commit 5bf55ba
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ go-scholar
# vendor/

.idea
scholar-example
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ Working:
* Queries each of the articles listed (up to 80) and parses the results for extra information
* Caches the profile for a day, and articles for a week (need to confirm this is working)
* This is in memory, so if the program is restarted, the cache is lost
* Configurable limit to number of articles to query in one go

## TODO:
* Configurable limit to number of articles to query in one go
* Pagination of articles
* Add throttling to avoid hitting the rate limit (figure out what the limit is)
* Cache the results of queries so we aren't hitting Google Scholar's servers every time (if we do too much we get a 429)
Expand Down
1 change: 0 additions & 1 deletion scholar-example/.gitignore

This file was deleted.

15 changes: 10 additions & 5 deletions scholar-example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,25 @@ import (

func main() {
userPtr := flag.String("user", "", "user profile to retrieve")
limitPtr := flag.Int("limit", 1, "limit the number of articles to retrieve")
flag.Parse()

if *userPtr == "" {
flag.Usage()
return
}
if *limitPtr < 1 {
*limitPtr = 1
}

fmt.Println("Searching for user: " + *userPtr)
fmt.Println("Searching for user: " + *userPtr + " with limit: " + fmt.Sprint(*limitPtr))
user := *userPtr
limit := *limitPtr

sch := scholar.New()
//articles := sch.QueryProfileDumpResponse(user, true)
//articles := sch.QueryProfile(user)
articles := sch.QueryProfileWithCache(user)
//articles := sch.QueryProfileDumpResponse(user, limit, true)
//articles := sch.QueryProfile(user, limit)
articles := sch.QueryProfileWithCache(user, limit)

if len(articles) == 0 {
fmt.Println("Not found")
Expand All @@ -32,7 +37,7 @@ func main() {
fmt.Println(article)
}

cachedArticles := sch.QueryProfileWithCache(user)
cachedArticles := sch.QueryProfileWithCache(user, limit)
if len(articles) == 0 {
fmt.Println("Not found")
return
Expand Down
14 changes: 7 additions & 7 deletions scholar/scholar.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,18 @@ func (a Article) String() string {
return "Article(\n title=" + a.title + "\n authors=" + a.authors + "\n scholarURL=" + a.scholarURL + "\n year=" + strconv.Itoa(a.year) + "\n month=" + strconv.Itoa(a.month) + "\n day=" + strconv.Itoa(a.day) + "\n numCitations=" + strconv.Itoa(a.numCitations) + "\n articles=" + strconv.Itoa(a.articles) + "\n description=" + a.description + "\n pdfURL=" + a.pdfURL + "\n journal=" + a.journal + "\n volume=" + a.volume + "\n pages=" + a.pages + "\n publisher=" + a.publisher + "\n scholarCitedByURL=" + strings.Join(a.scholarCitedByURLs, ", ") + "\n scholarVersionsURL=" + strings.Join(a.scholarVersionsURLs, ", ") + "\n scholarRelatedURL=" + strings.Join(a.scholarRelatedURLs, ", ") + "\n lastRetrieved=" + a.lastRetrieved.String() + "\n)"
}

func (sch Scholar) QueryProfile(user string) []Article {
return sch.QueryProfileDumpResponse(user, true, false)
func (sch Scholar) QueryProfile(user string, limit int) []Article {
return sch.QueryProfileDumpResponse(user, true, limit, false)
}

func (sch Scholar) QueryProfileWithCache(user string) []Article {
func (sch Scholar) QueryProfileWithCache(user string, limit int) []Article {
if sch.profile.Has(user) {
p, _ := sch.profile.Get(user)
lastAccess := p.lastRetrieved
if (time.Now().Sub(lastAccess)).Seconds() > MAX_TIME_PROFILE.Seconds() {
println("Profile cache expired for user: " + user)
sch.profile.Remove(user)
articles := sch.QueryProfileDumpResponse(user, true, false)
articles := sch.QueryProfileDumpResponse(user, true, limit, false)
var articleList []string
for _, article := range articles {
articleList = append(articleList, article.scholarURL)
Expand Down Expand Up @@ -107,7 +107,7 @@ func (sch Scholar) QueryProfileWithCache(user string) []Article {

} else {
println("Profile cache miss for user: " + user)
articles := sch.QueryProfileDumpResponse(user, true, false)
articles := sch.QueryProfileDumpResponse(user, true, limit, false)
var articleList []string
for _, article := range articles {
articleList = append(articleList, article.scholarURL)
Expand All @@ -127,12 +127,12 @@ func (sch Scholar) QueryProfileWithCache(user string) []Article {
// want to get updated information from the profile page only to save requests
//
// if dumpResponse is true, it will print the response to stdout (useful for debugging)
func (sch Scholar) QueryProfileDumpResponse(user string, queryArticles bool, dumpResponse bool) []Article {
func (sch Scholar) QueryProfileDumpResponse(user string, queryArticles bool, limit int, dumpResponse bool) []Article {
var articles []Article
client := &http.Client{}

// todo: make page size configurable, also support getting more than one page of citations
req, err := http.NewRequest("GET", BaseURL+"/citations?user="+user+"&cstart=0&pagesize=1", nil)
req, err := http.NewRequest("GET", BaseURL+"/citations?user="+user+"&cstart=0&pagesize="+strconv.Itoa(limit), nil)
if err != nil {
log.Fatalln(err)
}
Expand Down

0 comments on commit 5bf55ba

Please sign in to comment.