MATCH p=(v)<-[es1:e_shareholder_invest*1..2]-(v2)-[es2:e_shareholder_invest*0..2]-> (v3:t_enterprise)
WHERE id(v) == '企业X'
AND ALL(e1 IN es1 WHERE e1.con_prop >= 50)
AND ALL(e2 IN es2 WHERE e2.con_prop >= 50)
WITH nodes(p) as ps
LIMIT 100000
UNWIND ps AS ns
WITH ns
WHERE properties(ns).exposure_line >= 0 AND properties(ns).last_appl_state == 'PS'
RETURN DISTINCT ns;
GO 1 TO 2 STEPS FROM '企业X' OVER e_shareholder_invest REVERSELY
WHERE properties(edge).con_prop >= 50
YIELD distinct src(edge) AS ids
union
GO 1 TO 2 STEPS FROM '企业X' OVER e_shareholder_invest REVERSELY
WHERE properties(edge).con_prop >= 50
YIELD distinct src(edge) AS ids1 | GO 0 TO 2 STEPS FROM $-.ids1 OVER e_shareholder_invest
WHERE properties(edge).con_prop >= 50
YIELD distinct dst(edge) AS ids;
建议考虑在应用层拆成多条 GO 语句去实现,先通过一个 1-2 跳的 GO 拿到所有的股东,将结果集保存下来,再针对结果集中的每个股东,发起新的 GO 语句,查询投资企业,最后将所有结果 UNION 起来。在你的这个场景里面,如果我们假设一家企业的投资人并不会经常发生变动,那么第一步查询 2 度内股东的这个结果集,就可以缓存起来,改为定时更新或者按事件触发更新,不需要每次重新查询,这样可以减少在线查询的工作量;如果我们假设一个投资人所投资的企业也不会经常发生变化,第二步的查询结果也可以缓存起来,按同样的方法处理。这样优化完,在线查询的代价会极低。考虑到缓存数据需要占用空间,可以考虑只针对比较大的企业和投资人做缓存,也就是比较大的节点。
关于为什么建议这样改的原因如下:
首先,这个场景如果考虑用单条语句实现,从语义上是应该使用 match 的。match 的性能在当前版本是有比较大的挑战,下一个大版本会有质的提高。
其次,如果排除 match,GO 语句本身难以提供一个等价于 MATCH 的查询功能。就像你也发现的,GO 的功能是游走一些步以后,返回终点相关的信息,抛弃掉中间过程上的信息,所以 GO 的性能也比较快。用多条 GO 可以拼凑一个等价于 MATCH 的功能,但语句比较难写;即便写出来了,也比较难调试和运维,所以建议在应用层拆解为多个 GO 去做。
$var1 = GO FROM '910306020230520108' OVER e_shareholder_invest REVERSELY
WHERE properties(edge).con_prop >= 50
YIELD distinct src(edge) AS ids
union
GO FROM '910306020230520108' OVER e_shareholder_invest REVERSELY
WHERE properties(edge).con_prop >= 50
YIELD distinct src(edge) AS ids1 | GO FROM $-.ids1 OVER e_shareholder_invest REVERSELY
WHERE properties(edge).con_prop >= 50
YIELD distinct src(edge) AS ids;
$var2 = YIELD $var1.ids AS ids where $var1.ids is not null;
$var3 = GO FROM $var2.ids OVER e_shareholder_invest
WHERE properties(edge).con_prop >= 50 AND dst(edge) is not null
YIELD distinct dst(edge) AS ids
union
GO FROM $var2.ids OVER e_shareholder_invest
WHERE properties(edge).con_prop >= 50 AND dst(edge) is not null
YIELD distinct dst(edge) AS ids1 | GO FROM $-.ids1 OVER e_shareholder_invest
WHERE properties(edge).con_prop >= 50 AND dst(edge) is not null
YIELD distinct dst(edge) AS ids
union
YIELD $var2.ids as ids;
$var4 = FETCH PROP ON t_enterprise $var3.ids
YIELD id(vertex) AS company_credit_code, properties(vertex).company_name AS company_name,
properties(vertex).appl_no AS appl_no,
CASE properties(vertex).exposure_line >= 0 WHEN true THEN properties(vertex).exposure_line WHEN false THEN 0 ELSE 0 END AS exposure_line,
properties(vertex).date_updated AS date_updated;
$var5 = YIELD $var4.company_credit_code AS company_credit_code,$var4.company_name AS company_name,
$var4.appl_no AS appl_no,$var4.exposure_line AS exposure_line,
$var4.date_updated AS date_updated
where $var4.exposure_line > 0;