pgstrom如何工作
发布日期:2021-06-29 18:40:21 浏览次数:2 分类:技术文章

本文共 16458 字,大约阅读时间需要 54 分钟。

文章目录

pgstrom啥时候被启动

  • 当开启服务器的时候
  • src\backend\postmaster\postmaster.c:PostmasterMain
/*	 * process any libraries that should be preloaded at postmaster start	 */	process_shared_preload_libraries();
  • 在这儿的时候,pgstrom就被启动了
    • 然后就执行了void _PG_init(void)
/* * _PG_init * * Main entrypoint of PG-Strom. It shall be invoked only once when postmaster * process is starting up, then it calls other sub-systems to initialize for each ones. */void_PG_init(void){
/* * PG-Strom has to be loaded using shared_preload_libraries option */ if (!process_shared_preload_libraries_in_progress) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("PG-Strom must be loaded via shared_preload_libraries"))); /* link nvrtc library according to the current CUDA version */ pgstrom_init_nvrtc(); /* dump version number */#ifdef PGSTROM_VERSION elog(LOG, "PG-Strom version %s built for PostgreSQL %s", PGSTROM_VERSION, PG_MAJORVERSION);#else elog(LOG, "PG-Strom built for PostgreSQL %s", PG_MAJORVERSION);#endif /* init misc variables */ PAGE_SIZE = sysconf(_SC_PAGESIZE); PAGE_MASK = PAGE_SIZE - 1; PAGE_SHIFT = get_next_log2(PAGE_SIZE); PHYS_PAGES = sysconf(_SC_PHYS_PAGES); /* init GPU/CUDA infrastracture */ pgstrom_init_common_guc(); pgstrom_init_shmbuf(); pgstrom_init_gpu_device(); pgstrom_init_gpu_mmgr(); pgstrom_init_gpu_context(); pgstrom_init_cuda_program(); pgstrom_init_nvme_strom(); pgstrom_init_codegen(); /* init custom-scan providers/FDWs */ pgstrom_init_gputasks(); pgstrom_init_gpuscan(); pgstrom_init_gpujoin(); pgstrom_init_inners(); pgstrom_init_gpupreagg(); pgstrom_init_relscan(); pgstrom_init_arrow_fdw(); /* check commercial license, if any */ check_heterodb_license(); /* dummy custom-scan node */ memset(&pgstrom_dummy_path_methods, 0, sizeof(CustomPathMethods)); pgstrom_dummy_path_methods.CustomName = "Dummy"; pgstrom_dummy_path_methods.PlanCustomPath = pgstrom_dummy_create_plan; memset(&pgstrom_dummy_plan_methods, 0, sizeof(CustomScanMethods)); pgstrom_dummy_plan_methods.CustomName = "Dummy"; pgstrom_dummy_plan_methods.CreateCustomScanState = pgstrom_dummy_create_scan_state; /* planner hook registration */ planner_hook_next = planner_hook; planner_hook = pgstrom_post_planner;}

举例:如何添加GPUscan结点到生成计划里面?

/* * set_base_rel_pathlists *	  Finds all paths available for scanning each base-relation entry. *	  Sequential scan and any available indices are considered. *	  Each useful path is attached to its relation's 'pathlist' field. */static voidset_base_rel_pathlists(PlannerInfo *root){
Index rti; for (rti = 1; rti < root->simple_rel_array_size; rti++) {
RelOptInfo *rel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) continue; Assert(rel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (rel->reloptkind != RELOPT_BASEREL) continue; set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); }}

set_rel_pathlist

/* * set_rel_pathlist *	  Build access paths for a base relation */static voidset_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,				 Index rti, RangeTblEntry *rte){
if (IS_DUMMY_REL(rel)) {
/* We already proved the relation empty, so nothing more to do */ } else if (rte->inh) {
/* It's an "append relation", process accordingly */ set_append_rel_pathlist(root, rel, rti, rte); } else {
switch (rel->rtekind) {
case RTE_RELATION: if (rte->relkind == RELKIND_FOREIGN_TABLE) {
/* Foreign table */ set_foreign_pathlist(root, rel, rte); } else if (rte->tablesample != NULL) {
/* Sampled relation */ set_tablesample_rel_pathlist(root, rel, rte); } else {
/* Plain relation */ set_plain_rel_pathlist(root, rel, rte); } break; case RTE_SUBQUERY: /* Subquery --- fully handled during set_rel_size */ break; case RTE_FUNCTION: /* RangeFunction */ set_function_pathlist(root, rel, rte); break; case RTE_TABLEFUNC: /* Table Function */ set_tablefunc_pathlist(root, rel, rte); break; case RTE_VALUES: /* Values list */ set_values_pathlist(root, rel, rte); break; case RTE_CTE: /* CTE reference --- fully handled during set_rel_size */ break; case RTE_NAMEDTUPLESTORE: /* tuplestore reference --- fully handled during set_rel_size */ break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; } } /* * Allow a plugin to editorialize on the set of Paths for this base * relation. It could add new paths (such as CustomPaths) by calling * add_path(), or add_partial_path() if parallel aware. It could also * delete or modify paths added by the core code. */ if (set_rel_pathlist_hook) (*set_rel_pathlist_hook) (root, rel, rti, rte); /* * If this is a baserel, we should normally consider gathering any partial * paths we may have created for it. We have to do this after calling the * set_rel_pathlist_hook, else it cannot add partial paths to be included * here. * * However, if this is an inheritance child, skip it. Otherwise, we could * end up with a very large number of gather nodes, each trying to grab * its own pool of workers. Instead, we'll consider gathering partial * paths for the parent appendrel. * * Also, if this is the topmost scan/join rel (that is, the only baserel), * we postpone gathering until the final scan/join targetlist is available * (see grouping_planner). */ if (rel->reloptkind == RELOPT_BASEREL && bms_membership(root->all_baserels) != BMS_SINGLETON) generate_gather_paths(root, rel, false); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel);#ifdef OPTIMIZER_DEBUG debug_print_rel(root, rel);#endif}
  • 以CustomPath的形式加入GPUscanpath

pgstrom_init_gpuscan();

/* * pgstrom_init_gpuscan */voidpgstrom_init_gpuscan(void){
/* pg_strom.enable_gpuscan */ DefineCustomBoolVariable("pg_strom.enable_gpuscan", "Enables the use of GPU accelerated full-scan", NULL, &enable_gpuscan, true, PGC_USERSET, GUC_NOT_IN_SAMPLE, NULL, NULL, NULL); /* pg_strom.pullup_outer_scan */ DefineCustomBoolVariable("pg_strom.pullup_outer_scan", "Enables to pull up simple outer scan", NULL, &enable_pullup_outer_scan, true, PGC_USERSET, GUC_NOT_IN_SAMPLE, NULL, NULL, NULL); /* setup path methods */ memset(&gpuscan_path_methods, 0, sizeof(gpuscan_path_methods)); gpuscan_path_methods.CustomName = "GpuScan"; gpuscan_path_methods.PlanCustomPath = PlanGpuScanPath; /* setup plan methods */ memset(&gpuscan_plan_methods, 0, sizeof(gpuscan_plan_methods)); gpuscan_plan_methods.CustomName = "GpuScan"; gpuscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state; RegisterCustomScanMethods(&gpuscan_plan_methods); /* setup exec methods */ memset(&gpuscan_exec_methods, 0, sizeof(gpuscan_exec_methods)); gpuscan_exec_methods.CustomName = "GpuScan"; gpuscan_exec_methods.BeginCustomScan = ExecInitGpuScan; gpuscan_exec_methods.ExecCustomScan = ExecGpuScan; gpuscan_exec_methods.EndCustomScan = ExecEndGpuScan; gpuscan_exec_methods.ReScanCustomScan = ExecReScanGpuScan; gpuscan_exec_methods.EstimateDSMCustomScan = ExecGpuScanEstimateDSM; gpuscan_exec_methods.InitializeDSMCustomScan = ExecGpuScanInitDSM; gpuscan_exec_methods.InitializeWorkerCustomScan = ExecGpuScanInitWorker; gpuscan_exec_methods.ReInitializeDSMCustomScan = ExecGpuScanReInitializeDSM; gpuscan_exec_methods.ShutdownCustomScan = ExecShutdownGpuScan; gpuscan_exec_methods.ExplainCustomScan = ExplainGpuScan; /* hook registration */ set_rel_pathlist_next = set_rel_pathlist_hook; set_rel_pathlist_hook = gpuscan_add_scan_path;}

gpuscan_add_scan_path;

/* * gpuscan_add_scan_path - entrypoint of the set_rel_pathlist_hook */static voidgpuscan_add_scan_path(PlannerInfo *root,					  RelOptInfo *baserel,					  Index rtindex,					  RangeTblEntry *rte){
Path *pathnode; List *dev_quals = NIL; List *host_quals = NIL; IndexOptInfo *indexOpt; List *indexConds; List *indexQuals; cl_long indexNBlocks; ListCell *lc; /* call the secondary hook */ if (set_rel_pathlist_next) set_rel_pathlist_next(root, baserel, rtindex, rte); /* nothing to do, if either PG-Strom or GpuScan is not enabled */ if (!pgstrom_enabled || !enable_gpuscan) return; /* We already proved the relation empty, so nothing more to do */ if (is_dummy_rel(baserel)) return; /* It is the role of built-in Append node */ if (rte->inh) return; /* * GpuScan can run on only base relations or foreign table managed * by arrow_fdw. */ if (rte->relkind == RELKIND_FOREIGN_TABLE) {
if (!baseRelIsArrowFdw(baserel)) return; } else if (rte->relkind != RELKIND_RELATION && rte->relkind != RELKIND_MATVIEW) return; /* Check whether the qualifier can run on GPU device */ foreach (lc, baserel->baserestrictinfo) {
RestrictInfo *rinfo = lfirst(lc); if (pgstrom_device_expression(root, baserel, rinfo->clause)) dev_quals = lappend(dev_quals, rinfo); else host_quals = lappend(host_quals, rinfo); } if (dev_quals == NIL) return; /* Check availability of GpuScan+BRIN Index */ indexOpt = pgstrom_tryfind_brinindex(root, baserel, &indexConds, &indexQuals, &indexNBlocks); /* add GpuScan path in single process */ pathnode = create_gpuscan_path(root, baserel, dev_quals, host_quals, 0, indexOpt, indexConds, indexQuals, indexNBlocks); add_path(baserel, pathnode); /* If appropriate, consider parallel GpuScan */ if (baserel->consider_parallel && baserel->lateral_relids == NULL) {
int parallel_nworkers = compute_parallel_worker(baserel, baserel->pages, -1.0, max_parallel_workers_per_gather); /* * XXX - Do we need a something specific logic for GpuScan to adjust * parallel_workers. */ if (parallel_nworkers <= 0) return; /* add GpuScan path performing on parallel workers */ pathnode = create_gpuscan_path(root, baserel, dev_quals, host_quals, parallel_nworkers, indexOpt, indexConds, indexQuals, indexNBlocks); add_partial_path(baserel, pathnode); }}

pg提供了一些变量可以实现把

  • 以后的优化函数就不是standard_planner啦!
static PlannedStmt *pgstrom_post_planner(Query *parse,int cursorOptions,ParamListInfo boundParams){
PlannedStmt *pstmt; ListCell *lc; if (planner_hook_next) pstmt = planner_hook_next(parse, cursorOptions, boundParams); else pstmt = standard_planner(parse, cursorOptions, boundParams); pgstrom_post_planner_recurse(pstmt, &pstmt->planTree); foreach (lc, pstmt->subplans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); return pstmt;}
/* * pgstrom_post_planner * * remove 'dummy' custom scan node. */static voidpgstrom_post_planner_recurse(PlannedStmt *pstmt, Plan **p_plan){
Plan *plan = *p_plan; ListCell *lc; Assert(plan != NULL); switch (nodeTag(plan)) {
case T_ModifyTable: {
ModifyTable *splan = (ModifyTable *) plan; foreach (lc, splan->plans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; case T_Append: {
Append *splan = (Append *) plan; foreach (lc, splan->appendplans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; case T_MergeAppend: {
MergeAppend *splan = (MergeAppend *) plan; foreach (lc, splan->mergeplans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; case T_BitmapAnd: {
BitmapAnd *splan = (BitmapAnd *) plan; foreach (lc, splan->bitmapplans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; case T_BitmapOr: {
BitmapOr *splan = (BitmapOr *) plan; foreach (lc, splan->bitmapplans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; case T_SubqueryScan: {
SubqueryScan *sscan = (SubqueryScan *) plan; pgstrom_post_planner_recurse(pstmt, &sscan->subplan); } break; case T_CustomScan: {
CustomScan *cscan = (CustomScan *) plan; if (cscan->methods == &pgstrom_dummy_plan_methods) {
*p_plan = pgstrom_dummy_remove_plan(pstmt, cscan); pgstrom_post_planner_recurse(pstmt, p_plan); return; } else if (pgstrom_plan_is_gpupreagg(&cscan->scan.plan)) gpupreagg_post_planner(pstmt, cscan); foreach (lc, cscan->custom_plans) pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc)); } break; default: break; } if (plan->lefttree) pgstrom_post_planner_recurse(pstmt, &plan->lefttree); if (plan->righttree) pgstrom_post_planner_recurse(pstmt, &plan->righttree);}

重要连接

  • https://blog.csdn.net/weixin_40730091/article/details/102734600?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task

重要连接的例子

SELECT cat, count(*), avg(ax) FROM t0 NATURAL JOIN t1 NATURAL JOIN t2 NATURAL JOIN t3 NATURAL JOIN t4 NATURAL JOIN t5 NATURAL JOIN t6 NATURAL JOIN t7 NATURAL JOIN t8 GROUP BY cat;

HashAggregate  (cost=3529132.25..3529132.57 rows=26 width=12)  Output: t0.cat, pgstrom.count((pgstrom.nrows())), pgstrom.avg((pgstrom.nrows((t1.ax IS NOT NULL))), (pgstrom.psum(t1.ax)))  Group Key: t0.cat  ->  Custom Scan (GpuPreAgg)  (cost=29099.24..2859511.64 rows=234 width=48)        Output: t0.cat, pgstrom.nrows(), pgstrom.nrows((t1.ax IS NOT NULL)), pgstrom.psum(t1.ax)        Reduction: Local + Global        GPU Projection: t0.cat, t1.ax        Logic Parameter: SafetyLimit: 2147483647, KeyDistSalt: 9        Extra: outer-bulk-exec        ->  Custom Scan (GpuJoin) on public.t0  (cost=25099.24..2826221.34 rows=93721454 width=12)              Output: t0.cat, t1.ax              GPU Projection: t0.cat::text, t1.ax::double precision              Depth 1: GpuHashJoin, HashKeys: (t0.fid)                       JoinQuals: (t0.fid = t6.fid)                       Nrows (in/out: 97.75%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 2: GpuHashJoin, HashKeys: (t0.cid)                       JoinQuals: (t0.cid = t3.cid)                       Nrows (in/out: 98.34%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 3: GpuHashJoin, HashKeys: (t0.bid)                       JoinQuals: (t0.bid = t2.bid)                       Nrows (in/out: 98.76%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 4: GpuHashJoin, HashKeys: (t0.eid)                       JoinQuals: (t0.eid = t5.eid)                       Nrows (in/out: 98.97%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 5: GpuHashJoin, HashKeys: (t0.aid)                       JoinQuals: (t0.aid = t1.aid)                       Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 6: GpuHashJoin, HashKeys: (t0.did)                       JoinQuals: (t0.did = t4.did)                       Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 7: GpuHashJoin, HashKeys: (t0.gid)                       JoinQuals: (t0.gid = t7.gid)                       Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)              Depth 8: GpuHashJoin, HashKeys: (t0.hid)                       JoinQuals: (t0.hid = t8.hid)                       Nrows (in/out: 99.74%), KDS-Hash (size: 13.47MB, nbatches: 1)              Extra: bulk-exec-support, row-format              ->  Seq Scan on public.t6  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t6.fid, t6.ftext, t6.fx              ->  Seq Scan on public.t3  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t3.cid, t3.ctext, t3.cx              ->  Seq Scan on public.t2  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t2.bid, t2.btext, t2.bx              ->  Seq Scan on public.t5  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t5.eid, t5.etext, t5.ex              ->  Seq Scan on public.t1  (cost=0.00..1935.00 rows=100000 width=12)                    Output: t1.aid, t1.atext, t1.ax              ->  Seq Scan on public.t4  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t4.did, t4.dtext, t4.dx              ->  Seq Scan on public.t7  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t7.gid, t7.gtext, t7.gx              ->  Seq Scan on public.t8  (cost=0.00..1935.00 rows=100000 width=4)                    Output: t8.hid, t8.htext, t8.hx

转载地址:https://cyj666.blog.csdn.net/article/details/104618118 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:standard_planner
下一篇:pgsql扩展全在这里

发表评论

最新留言

做的很好,不错不错
[***.243.131.199]2024年04月19日 18时49分08秒

关于作者

    喝酒易醉,品茶养心,人生如梦,品茶悟道,何以解忧?唯有杜康!
-- 愿君每日到此一游!

推荐文章