#!/usr/bin/env python3 """ AMiner 开放平台 API 客户端 支持 6 大学术数据查询工作流及全部 28 个独立 API 使用方法: python aminer_client.py --token --action [选项] 工作流: scholar_profile 学者全景分析(搜索→详情+画像+论文+专利+项目) paper_deep_dive 论文深度挖掘(搜索→详情+引用链) org_analysis 机构研究力分析(消歧→详情+学者+论文+专利) venue_papers 期刊论文监控(搜索→详情+按年份论文) paper_qa 学术智能问答(AI驱动关键词搜索) patent_search 专利搜索与详情 scholar_patents 通过学者名获取其所有专利详情 直接调用单个 API: raw 直接调用任意 API,需指定 --api 和 --params 控制台(生成Token):https://open.aminer.cn/open/board?tab=control 文档:https://open.aminer.cn/open/doc """ import argparse import json import sys import time import random import urllib.request import urllib.error import urllib.parse from typing import Any, Optional BASE_URL = "https://datacenter.aminer.cn/gateway/open_platform" TEST_TOKEN = "" # 请前往 https://open.aminer.cn/open/board?tab=control 生成你自己的 Token REQUEST_TIMEOUT_SECONDS = 30 MAX_RETRIES = 3 RETRYABLE_HTTP_STATUS = {408, 429, 500, 502, 503, 504} # ────────────────────────────────────────────────────────────────────────────── # 核心 HTTP 工具 # ────────────────────────────────────────────────────────────────────────────── def _request(token: str, method: str, path: str, params: Optional[dict] = None, body: Optional[dict] = None) -> Any: """发送 HTTP 请求并返回解析后的 JSON 数据(含重试)。""" url = BASE_URL + path headers = { "Authorization": token, "Content-Type": "application/json;charset=utf-8", } if method.upper() == "GET" and params: query = urllib.parse.urlencode( {k: (json.dumps(v) if isinstance(v, (list, dict)) else v) for k, v in params.items() if v is not None} ) url = f"{url}?{query}" data = json.dumps(body).encode("utf-8") if body else None req = urllib.request.Request(url, data=data, headers=headers, method=method.upper()) for attempt in range(1, MAX_RETRIES + 1): try: with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT_SECONDS) as resp: raw = resp.read().decode("utf-8") return json.loads(raw) except urllib.error.HTTPError as e: body_bytes = e.read() try: err = json.loads(body_bytes) except Exception: err = body_bytes.decode("utf-8", errors="replace") retryable = e.code in RETRYABLE_HTTP_STATUS print(f"[HTTP {e.code}] {e.reason}: {err}", file=sys.stderr) if retryable and attempt < MAX_RETRIES: backoff = (2 ** (attempt - 1)) + random.uniform(0, 0.3) print(f"[重试] attempt={attempt}/{MAX_RETRIES} wait={backoff:.2f}s", file=sys.stderr) time.sleep(backoff) continue return { "code": e.code, "success": False, "msg": str(e.reason), "error": err, "retryable": retryable, } except urllib.error.URLError as e: reason = str(getattr(e, "reason", e)) print(f"[请求失败] {reason}", file=sys.stderr) if attempt < MAX_RETRIES: backoff = (2 ** (attempt - 1)) + random.uniform(0, 0.3) print(f"[重试] attempt={attempt}/{MAX_RETRIES} wait={backoff:.2f}s", file=sys.stderr) time.sleep(backoff) continue return { "code": -1, "success": False, "msg": "network_error", "error": reason, "retryable": True, } except TimeoutError as e: print(f"[请求超时] {e}", file=sys.stderr) if attempt < MAX_RETRIES: backoff = (2 ** (attempt - 1)) + random.uniform(0, 0.3) print(f"[重试] attempt={attempt}/{MAX_RETRIES} wait={backoff:.2f}s", file=sys.stderr) time.sleep(backoff) continue return { "code": -1, "success": False, "msg": "timeout", "error": str(e), "retryable": True, } except Exception as e: print(f"[请求失败] {e}", file=sys.stderr) return { "code": -1, "success": False, "msg": "unknown_error", "error": str(e), "retryable": False, } return { "code": -1, "success": False, "msg": "request_failed", "error": "max retries exceeded", "retryable": True, } def _print(data: Any) -> None: """格式化打印 JSON 结果。""" print(json.dumps(data, ensure_ascii=False, indent=2)) # ────────────────────────────────────────────────────────────────────────────── # 论文类 API # ────────────────────────────────────────────────────────────────────────────── def paper_search(token: str, title: str, page: int = 0, size: int = 10) -> Any: """论文搜索(免费):根据标题搜索,返回 ID/标题/DOI。""" return _request(token, "GET", "/api/paper/search", params={"title": title, "page": page, "size": size}) def paper_search_pro(token: str, title: str = None, keyword: str = None, abstract: str = None, author: str = None, org: str = None, venue: str = None, order: str = None, page: int = 0, size: int = 10) -> Any: """论文搜索 pro(¥0.01/次):多条件搜索。""" params = {"page": page, "size": size} for k, v in [("title", title), ("keyword", keyword), ("abstract", abstract), ("author", author), ("org", org), ("venue", venue), ("order", order)]: if v is not None: params[k] = v return _request(token, "GET", "/api/paper/search/pro", params=params) def paper_qa_search(token: str, query: str = None, use_topic: bool = False, topic_high: str = None, topic_middle: str = None, topic_low: str = None, title: list = None, doi: str = None, year: list = None, sci_flag: bool = False, n_citation_flag: bool = False, force_citation_sort: bool = False, force_year_sort: bool = False, author_terms: list = None, org_terms: list = None, size: int = 10, offset: int = 0) -> Any: """论文问答搜索(¥0.05/次):AI 智能问答,支持自然语言和结构化关键词。""" body: dict = {"use_topic": use_topic, "size": size, "offset": offset} if query: body["query"] = query if topic_high: body["topic_high"] = topic_high if topic_middle: body["topic_middle"] = topic_middle if topic_low: body["topic_low"] = topic_low if title: body["title"] = title if doi: body["doi"] = doi if year: body["year"] = year if sci_flag: body["sci_flag"] = True if n_citation_flag: body["n_citation_flag"] = True if force_citation_sort: body["force_citation_sort"] = True if force_year_sort: body["force_year_sort"] = True if author_terms: body["author_terms"] = author_terms if org_terms: body["org_terms"] = org_terms return _request(token, "POST", "/api/paper/qa/search", body=body) def paper_info(token: str, ids: list) -> Any: """论文信息(免费):批量根据 ID 获取基础信息。""" return _request(token, "POST", "/api/paper/info", body={"ids": ids}) def paper_detail(token: str, paper_id: str) -> Any: """论文详情(¥0.01/次):获取完整论文信息。""" return _request(token, "GET", "/api/paper/detail", params={"id": paper_id}) def paper_relation(token: str, paper_id: str) -> Any: """论文引用(¥0.10/次):获取该论文引用的其他论文。""" return _request(token, "GET", "/api/paper/relation", params={"id": paper_id}) def paper_list_by_search_venue(token: str, keyword: str = None, venue: str = None, author: str = None, order: str = None, page: int = 0, size: int = 10) -> Any: """论文综合搜索(¥0.30/次):通过关键词/期刊/作者获取完整论文信息。""" params = {"page": page, "size": size} for k, v in [("keyword", keyword), ("venue", venue), ("author", author), ("order", order)]: if v is not None: params[k] = v return _request(token, "GET", "/api/paper/list/by/search/venue", params=params) def paper_list_by_keywords(token: str, keywords: list, page: int = 0, size: int = 10) -> Any: """论文批量查询(¥0.10/次):多关键词获取论文摘要等信息。""" params = {"page": page, "size": size, "keywords": json.dumps(keywords, ensure_ascii=False)} return _request(token, "GET", "/api/paper/list/citation/by/keywords", params=params) def paper_detail_by_condition(token: str, year: int, venue_id: str = None) -> Any: """按年份与期刊获取论文详情(¥0.20/次):year 与 venue_id 须同时传入,仅传 year 返回 null。""" params: dict = {"year": year} if venue_id: params["venue_id"] = venue_id return _request(token, "GET", "/api/paper/platform/allpubs/more/detail/by/ts/org/venue", params=params) # ────────────────────────────────────────────────────────────────────────────── # 学者类 API # ────────────────────────────────────────────────────────────────────────────── def person_search(token: str, name: str = None, org: str = None, org_id: list = None, offset: int = 0, size: int = 5) -> Any: """学者搜索(免费):根据姓名/机构搜索学者。""" body: dict = {"offset": offset, "size": size} if name: body["name"] = name if org: body["org"] = org if org_id: body["org_id"] = org_id return _request(token, "POST", "/api/person/search", body=body) def person_detail(token: str, person_id: str) -> Any: """学者详情(¥1.00/次):获取完整个人信息。""" return _request(token, "GET", "/api/person/detail", params={"id": person_id}) def person_figure(token: str, person_id: str) -> Any: """学者画像(¥0.50/次):获取研究兴趣、领域及结构化经历。""" return _request(token, "GET", "/api/person/figure", params={"id": person_id}) def person_paper_relation(token: str, person_id: str) -> Any: """学者论文(¥1.50/次):获取学者发表的论文列表。""" return _request(token, "GET", "/api/person/paper/relation", params={"id": person_id}) def person_patent_relation(token: str, person_id: str) -> Any: """学者专利(¥1.50/次):获取学者的专利列表。""" return _request(token, "GET", "/api/person/patent/relation", params={"id": person_id}) def person_project(token: str, person_id: str) -> Any: """学者项目(¥3.00/次):获取科研项目(资助金额/时间/来源)。""" return _request(token, "GET", "/api/project/person/v3/open", params={"id": person_id}) # ────────────────────────────────────────────────────────────────────────────── # 机构类 API # ────────────────────────────────────────────────────────────────────────────── def org_search(token: str, orgs: list) -> Any: """机构搜索(免费):根据名称关键词搜索机构。""" return _request(token, "POST", "/api/organization/search", body={"orgs": orgs}) def org_detail(token: str, ids: list) -> Any: """机构详情(¥0.01/次):根据机构 ID 获取详情。""" return _request(token, "POST", "/api/organization/detail", body={"ids": ids}) def org_person_relation(token: str, org_id: str, offset: int = 0) -> Any: """机构学者(¥0.50/次):获取机构下的学者列表(每次 10 条)。""" return _request(token, "GET", "/api/organization/person/relation", params={"org_id": org_id, "offset": offset}) def org_paper_relation(token: str, org_id: str, offset: int = 0) -> Any: """机构论文(¥0.10/次):获取机构学者发表的论文列表(每次 10 条)。""" return _request(token, "GET", "/api/organization/paper/relation", params={"org_id": org_id, "offset": offset}) def org_patent_relation(token: str, org_id: str, page: int = 1, page_size: int = 100) -> Any: """机构专利(¥0.10/次):获取机构拥有的专利列表,支持分页(page_size 最大 10000)。""" return _request(token, "GET", "/api/organization/patent/relation", params={"id": org_id, "page": page, "page_size": page_size}) def org_disambiguate(token: str, org: str) -> Any: """机构消歧(¥0.01/次):获取机构标准化名称。""" return _request(token, "POST", "/api/organization/na", body={"org": org}) def org_disambiguate_pro(token: str, org: str) -> Any: """机构消歧 pro(¥0.05/次):提取一级和二级机构 ID。""" return _request(token, "POST", "/api/organization/na/pro", body={"org": org}) # ────────────────────────────────────────────────────────────────────────────── # 期刊类 API # ────────────────────────────────────────────────────────────────────────────── def venue_search(token: str, name: str) -> Any: """期刊搜索(免费):根据名称搜索期刊 ID 和标准名称。""" return _request(token, "POST", "/api/venue/search", body={"name": name}) def venue_detail(token: str, venue_id: str) -> Any: """期刊详情(¥0.20/次):获取 ISSN、简称、类型等。""" return _request(token, "POST", "/api/venue/detail", body={"id": venue_id}) def venue_paper_relation(token: str, venue_id: str, offset: int = 0, limit: int = 20, year: Optional[int] = None) -> Any: """期刊论文(¥0.10/次):获取期刊论文列表(支持按年份筛选)。""" body: dict = {"id": venue_id, "offset": offset, "limit": limit} if year is not None: body["year"] = year return _request(token, "POST", "/api/venue/paper/relation", body=body) # ────────────────────────────────────────────────────────────────────────────── # 专利类 API # ────────────────────────────────────────────────────────────────────────────── def patent_search(token: str, query: str, page: int = 0, size: int = 10) -> Any: """专利搜索(免费):根据名称/关键词搜索专利。""" return _request(token, "POST", "/api/patent/search", body={"query": query, "page": page, "size": size}) def patent_info(token: str, patent_id: str) -> Any: """专利信息(免费):获取专利基础信息(标题/专利号/发明人)。""" return _request(token, "GET", "/api/patent/info", params={"id": patent_id}) def patent_detail(token: str, patent_id: str) -> Any: """专利详情(¥0.01/次):获取完整专利信息(摘要/申请日/IPC等)。""" return _request(token, "GET", "/api/patent/detail", params={"id": patent_id}) # ────────────────────────────────────────────────────────────────────────────── # 组合工作流 # ────────────────────────────────────────────────────────────────────────────── def workflow_scholar_profile(token: str, name: str) -> dict: """ 工作流 1:学者全景分析 搜索学者 → 详情 + 画像 + 论文 + 专利 + 项目 """ print(f"[1/6] 搜索学者:{name}", file=sys.stderr) search_result = person_search(token, name=name, size=5) if not search_result or not search_result.get("data"): return {"error": f"未找到学者:{name}"} candidates = search_result["data"] scholar = candidates[0] person_id = scholar.get("id") or scholar.get("_id") print(f" 找到:{scholar.get('name')} ({scholar.get('org')}),ID={person_id}", file=sys.stderr) result = { "source_api_chain": [ "person_search", "person_detail", "person_figure", "person_paper_relation", "person_patent_relation", "person_project", ], "search_candidates": candidates[:3], "selected": { "id": person_id, "name": scholar.get("name"), "name_zh": scholar.get("name_zh"), "org": scholar.get("org"), "interests": scholar.get("interests"), "n_citation": scholar.get("n_citation"), } } print("[2/6] 获取学者详情...", file=sys.stderr) detail = person_detail(token, person_id) if detail and detail.get("data"): result["detail"] = detail["data"] print("[3/6] 获取学者画像...", file=sys.stderr) figure = person_figure(token, person_id) if figure and figure.get("data"): result["figure"] = figure["data"] print("[4/6] 获取学者论文...", file=sys.stderr) papers = person_paper_relation(token, person_id) if papers and papers.get("data"): result["papers"] = papers["data"][:20] result["papers_total"] = papers.get("total", len(papers["data"])) print("[5/6] 获取学者专利...", file=sys.stderr) patents = person_patent_relation(token, person_id) if patents and patents.get("data"): result["patents"] = patents["data"][:10] print("[6/6] 获取学者项目...", file=sys.stderr) projects = person_project(token, person_id) if projects and projects.get("data"): result["projects"] = projects["data"][:10] return result def workflow_paper_deep_dive(token: str, title: str = None, keyword: str = None, author: str = None, order: str = "n_citation") -> dict: """ 工作流 2:论文深度挖掘 搜索论文 → 详情 + 引用链 + 引用论文基础信息 """ print(f"[1/4] 搜索论文:title={title}, keyword={keyword}", file=sys.stderr) if keyword or author: search_result = paper_search_pro(token, title=title, keyword=keyword, author=author, order=order, size=5) search_api = "paper_search_pro" else: search_result = paper_search(token, title=title or keyword, size=5) search_api = "paper_search" if not search_result or not search_result.get("data"): # 标题检索无结果时,降级到 pro 检索,提高召回率 print(" 标题检索无结果,降级到 paper_search_pro...", file=sys.stderr) search_result = paper_search_pro(token, title=title, keyword=title, author=author, order=order, size=5) search_api = "paper_search_pro(fallback)" if not search_result or not search_result.get("data"): return {"error": "未找到相关论文"} papers = search_result["data"] top_paper = papers[0] paper_id = top_paper.get("id") or top_paper.get("_id") print(f" 找到:{top_paper.get('title')[:60]},ID={paper_id}", file=sys.stderr) result = { "source_api_chain": [ search_api, "paper_detail", "paper_relation", "paper_info", ], "search_candidates": papers[:5], "selected_id": paper_id, "selected_title": top_paper.get("title"), } print("[2/4] 获取论文详情...", file=sys.stderr) detail = paper_detail(token, paper_id) if detail and detail.get("data"): result["detail"] = detail["data"] print("[3/4] 获取引用关系...", file=sys.stderr) relation = paper_relation(token, paper_id) if relation and relation.get("data"): # data 结构:[{"_id": "", "cited": [{...}, ...]}] # 外层数组是以论文为单位的包装,真正的引用列表在 cited 字段里 all_cited = [] for item in relation["data"]: all_cited.extend(item.get("cited") or []) result["citations_count"] = len(all_cited) result["citations_preview"] = all_cited[:10] # 批量获取被引论文基础信息 cited_ids = [c.get("_id") or c.get("id") for c in all_cited[:20] if c.get("_id") or c.get("id")] if cited_ids: print(f"[4/4] 批量获取 {len(cited_ids)} 篇被引论文信息...", file=sys.stderr) info = paper_info(token, cited_ids) if info and info.get("data"): result["cited_papers_info"] = info["data"] else: print("[4/4] 跳过(无被引 ID)", file=sys.stderr) else: print("[4/4] 跳过(无引用数据)", file=sys.stderr) return result def workflow_org_analysis(token: str, org: str) -> dict: """ 工作流 3:机构研究力分析 机构消歧 pro → 详情 + 学者 + 论文 + 专利 """ print(f"[1/5] 机构消歧:{org}", file=sys.stderr) disamb = org_disambiguate_pro(token, org) org_id = None if disamb and disamb.get("data"): data = disamb["data"] if isinstance(data, list) and data: first = data[0] org_id = first.get("一级ID") or first.get("二级ID") elif isinstance(data, dict): org_id = data.get("一级ID") or data.get("二级ID") if not org_id: print(" 消歧 pro 未返回 ID,尝试机构搜索...", file=sys.stderr) search_r = org_search(token, [org]) if search_r and search_r.get("data"): orgs = search_r["data"] org_id = orgs[0].get("org_id") if orgs else None if not org_id: return {"error": f"无法找到机构 ID:{org}"} print(f" 机构 ID:{org_id}", file=sys.stderr) result = { "source_api_chain": [ "org_disambiguate_pro", "org_detail", "org_person_relation", "org_paper_relation", "org_patent_relation", ], "org_query": org, "org_id": org_id, "disambiguate": disamb, } print("[2/5] 获取机构详情...", file=sys.stderr) detail = org_detail(token, [org_id]) if detail and detail.get("data"): result["detail"] = detail["data"] print("[3/5] 获取机构学者(前10位)...", file=sys.stderr) scholars = org_person_relation(token, org_id, offset=0) if scholars and scholars.get("data"): result["scholars"] = scholars["data"] result["scholars_total"] = scholars.get("total", len(scholars["data"])) print("[4/5] 获取机构论文(前10篇)...", file=sys.stderr) papers = org_paper_relation(token, org_id, offset=0) if papers and papers.get("data"): result["papers"] = papers["data"] result["papers_total"] = papers.get("total", len(papers["data"])) print("[5/5] 获取机构专利(最多100条)...", file=sys.stderr) patents = org_patent_relation(token, org_id, page=1, page_size=100) if patents and patents.get("data"): result["patents"] = patents["data"] result["patents_total"] = patents.get("total", len(patents["data"])) return result def workflow_venue_papers(token: str, venue: str, year: Optional[int] = None, limit: int = 20) -> dict: """ 工作流 4:期刊论文监控 期刊搜索 → 期刊详情 + 按年份获取论文列表 """ print(f"[1/3] 搜索期刊:{venue}", file=sys.stderr) search_result = venue_search(token, venue) if not search_result or not search_result.get("data"): return {"error": f"未找到期刊:{venue}"} venues = search_result["data"] top_venue = venues[0] venue_id = top_venue.get("id") print(f" 找到:{top_venue.get('name_en')},ID={venue_id}", file=sys.stderr) result = { "source_api_chain": [ "venue_search", "venue_detail", "venue_paper_relation", ], "search_candidates": venues[:3], "venue_id": venue_id, } print("[2/3] 获取期刊详情...", file=sys.stderr) detail = venue_detail(token, venue_id) if detail and detail.get("data"): result["venue_detail"] = detail["data"] print(f"[3/3] 获取期刊论文(year={year}, limit={limit})...", file=sys.stderr) papers = venue_paper_relation(token, venue_id, year=year, limit=limit) if papers and papers.get("data"): result["papers"] = papers["data"] result["papers_total"] = papers.get("total", len(papers["data"])) return result def workflow_paper_qa(token: str, query: str = None, topic_high: str = None, topic_middle: str = None, sci_flag: bool = False, sort_citation: bool = False, size: int = 10) -> dict: """ 工作流 5:学术智能问答 使用 AI 驱动的论文问答搜索接口 """ use_topic = topic_high is not None print(f"[1/1] 学术问答搜索:query={query}, use_topic={use_topic}", file=sys.stderr) qa_result = paper_qa_search( token, query=query, use_topic=use_topic, topic_high=topic_high, topic_middle=topic_middle, sci_flag=sci_flag, force_citation_sort=sort_citation, size=size ) if qa_result and qa_result.get("code") == 200 and qa_result.get("data"): qa_result["source_api_chain"] = ["paper_qa_search"] qa_result["route"] = "paper_qa_search" return qa_result # query 模式无结果时,回退到 pro 检索 if query: print(" paper_qa_search 无结果,降级到 paper_search_pro...", file=sys.stderr) fallback = paper_search_pro(token, keyword=query, order="n_citation", size=size) data = (fallback or {}).get("data") or [] return { "code": 200 if data else (qa_result or {}).get("code", -1), "success": bool(data), "msg": "" if data else "no data", "data": data, "total": (fallback or {}).get("total", len(data)), "route": "paper_qa_search -> paper_search_pro", "source_api_chain": ["paper_qa_search", "paper_search_pro"], "primary_result": qa_result, } if isinstance(qa_result, dict): qa_result["source_api_chain"] = ["paper_qa_search"] qa_result["route"] = "paper_qa_search" return qa_result def workflow_patent_search(token: str, query: str, page: int = 0, size: int = 10) -> dict: """ 工作流 6:专利搜索与详情 专利搜索 → 获取每条专利的详情 """ print(f"[1/2] 搜索专利:{query}", file=sys.stderr) search_result = patent_search(token, query, page=page, size=size) if not search_result or not search_result.get("data"): return {"error": f"未找到专利:{query}"} patents = search_result["data"] result = { "source_api_chain": ["patent_search", "patent_detail"], "search_results": patents, "total": len(patents), } print(f"[2/2] 获取前 {min(3, len(patents))} 条专利详情...", file=sys.stderr) details = [] for p in patents[:3]: pid = p.get("id") if pid: d = patent_detail(token, pid) if d and d.get("data"): details.append(d["data"]) result["details"] = details return result def workflow_scholar_patents(token: str, name: str) -> dict: """ 通过学者名获取其专利列表 + 每条专利详情 """ print(f"[1/3] 搜索学者:{name}", file=sys.stderr) search_result = person_search(token, name=name, size=3) if not search_result or not search_result.get("data"): return {"error": f"未找到学者:{name}"} scholar = search_result["data"][0] person_id = scholar.get("id") print(f" 找到:{scholar.get('name')},ID={person_id}", file=sys.stderr) result = {"scholar": scholar} print("[2/3] 获取学者专利列表...", file=sys.stderr) patents = person_patent_relation(token, person_id) if not patents or not patents.get("data"): return {**result, "patents": [], "error": "该学者无专利数据"} patent_list = patents["data"] result["patents_list"] = patent_list print(f"[3/3] 获取前 {min(3, len(patent_list))} 条专利详情...", file=sys.stderr) details = [] for p in patent_list[:3]: pid = p.get("patent_id") if pid: d = patent_detail(token, pid) if d and d.get("data"): details.append(d["data"]) result["patent_details"] = details return result # ────────────────────────────────────────────────────────────────────────────── # 命令行入口 # ────────────────────────────────────────────────────────────────────────────── def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( description="AMiner 开放平台学术数据查询客户端", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: # 学者全景分析 python aminer_client.py --token --action scholar_profile --name "Andrew Ng" # 论文深度挖掘 python aminer_client.py --token --action paper_deep_dive --title "BERT" python aminer_client.py --token --action paper_deep_dive --keyword "large language model" --author "Hinton" # 机构研究力分析 python aminer_client.py --token --action org_analysis --org "Tsinghua University" # 期刊论文监控 python aminer_client.py --token --action venue_papers --venue "NeurIPS" --year 2023 # 学术智能问答 python aminer_client.py --token --action paper_qa --query "蛋白质结构深度学习" python aminer_client.py --token --action paper_qa \\ --topic_high '[["transformer","self-attention"],["protein folding"]]' \\ --sci_flag --sort_citation # 专利搜索 python aminer_client.py --token --action patent_search --query "量子计算芯片" # 学者专利 python aminer_client.py --token --action scholar_patents --name "张首晟" # 直接调用单个 API python aminer_client.py --token --action raw \\ --api paper_search --params '{"title":"BERT","page":0,"size":5}' 控制台(生成Token):https://open.aminer.cn/open/board?tab=control 文档:https://open.aminer.cn/open/doc """ ) p.add_argument("--token", default=TEST_TOKEN, help="AMiner API Token(前往 https://open.aminer.cn/open/board?tab=control 生成)") p.add_argument("--action", required=True, choices=["scholar_profile", "paper_deep_dive", "org_analysis", "venue_papers", "paper_qa", "patent_search", "scholar_patents", "raw"], help="执行的操作") # 通用参数 p.add_argument("--name", help="学者姓名") p.add_argument("--title", help="论文标题") p.add_argument("--keyword", help="关键词") p.add_argument("--author", help="作者名") p.add_argument("--org", help="机构名称") p.add_argument("--venue", help="期刊名称") p.add_argument("--query", help="查询字符串(自然语言问答或专利搜索)") p.add_argument("--year", type=int, help="年份筛选") p.add_argument("--size", type=int, default=10, help="返回条数") p.add_argument("--page", type=int, default=0, help="页码") p.add_argument("--page_size", type=int, default=100, help="机构专利分页条数(最大 10000)") p.add_argument("--order", default="n_citation", choices=["n_citation", "year"], help="排序方式") # 论文问答专用 p.add_argument("--topic_high", help="必须匹配的关键词数组(JSON字符串,外层AND内层OR)") p.add_argument("--topic_middle", help="大幅加分关键词(格式同 topic_high)") p.add_argument("--sci_flag", action="store_true", help="只返回 SCI 论文") p.add_argument("--sort_citation", action="store_true", help="按引用量排序") # raw 模式 p.add_argument("--api", help="[raw模式] API 函数名,如 paper_search") p.add_argument("--params", help="[raw模式] JSON 格式的参数字典") return p def main(): parser = build_parser() args = parser.parse_args() token = args.token if args.action == "scholar_profile": if not args.name: parser.error("--action scholar_profile 需要 --name 参数") result = workflow_scholar_profile(token, args.name) elif args.action == "paper_deep_dive": if not args.title and not args.keyword: parser.error("--action paper_deep_dive 需要 --title 或 --keyword 参数") result = workflow_paper_deep_dive( token, title=args.title, keyword=args.keyword, author=args.author, order=args.order ) elif args.action == "org_analysis": if not args.org: parser.error("--action org_analysis 需要 --org 参数") result = workflow_org_analysis(token, args.org) elif args.action == "venue_papers": if not args.venue: parser.error("--action venue_papers 需要 --venue 参数") result = workflow_venue_papers(token, args.venue, year=args.year, limit=args.size) elif args.action == "paper_qa": if not args.query and not args.topic_high: parser.error("--action paper_qa 需要 --query 或 --topic_high 参数") result = workflow_paper_qa( token, query=args.query, topic_high=args.topic_high, topic_middle=args.topic_middle, sci_flag=args.sci_flag, sort_citation=args.sort_citation, size=args.size ) elif args.action == "patent_search": if not args.query: parser.error("--action patent_search 需要 --query 参数") result = workflow_patent_search(token, args.query, page=args.page, size=args.size) elif args.action == "scholar_patents": if not args.name: parser.error("--action scholar_patents 需要 --name 参数") result = workflow_scholar_patents(token, args.name) elif args.action == "raw": if not args.api: parser.error("--action raw 需要 --api 参数(API 函数名)") fn = globals().get(args.api) if fn is None or not callable(fn): parser.error(f"未找到 API 函数:{args.api}。可用函数请查看源码。") kwargs = json.loads(args.params) if args.params else {} result = fn(token, **kwargs) else: parser.print_help() sys.exit(1) _print(result) if __name__ == "__main__": main()