In [1]:
%%capture
# load the magic extension and imports
%reload_ext nextcode
import pandas as pd
import GOR_query_helper as GQH
%env LOG_QUERY=1

project = "matthew_sampson_hg38"
%env GOR_API_PROJECT={project}

Basic addition, deletions, and overwrite of definitions.¶

In [2]:
qh = GQH.GOR_Query_Helper()
qh.add("def #d1# = 2;")
qh.add("create #c1# = norrows 2 | calc rand random();")
print(qh.defs())
print('Now delete')
qh.delete('#d1#')
print(qh.defs())
print('Now add twice to demonstrate overwrite and now without a semicolon')
qh.add("def #d1# = 1  ")
qh.add("def #d1# = 1+1  ")
print(qh.defs())
def #d1# = 2;
create #c1# = norrows 2 | calc rand random();

Now delete
create #c1# = norrows 2 | calc rand random();

Now add twice to demonstrate overwrite and now without a semicolon
create #c1# = norrows 2 | calc rand random();
def #d1# = 1+1;

Clone multiple queries over to a new query helper, e.g. to work on another branch of the query.¶

In [3]:
qh2 = GQH.GOR_Query_Helper()
qh2.add_many(qh.defs())
print('Here are the cloned defs.')
print(qh2.defs())
print('Now add anoter create statement')
mydefs = qh2.add('create c2 = gor #genes# | top 2')
print(mydefs)
Here are the cloned defs.
create #c1# = norrows 2 | calc rand random();
def #d1# = 1+1;

Now add anoter create statement
create #c1# = norrows 2 | calc rand random();
def #d1# = 1+1;
create c2 = gor #genes# | top 2;

Use the definitons to run a query using the GOR magic-syntax.¶

In [4]:
mydefs = qh.defs()
print(mydefs)
create #c1# = norrows 2 | calc rand random();
def #d1# = 1+1;

In [5]:
%%gor
$mydefs
def #d2# = 2;
norrows 2 | calc d1 #d1# | calc d2 #d2#
Query ran in 0.27 sec
Query fetched 2 rows in 0.01 sec (total time 0.29 sec)
                                             
Out[5]:
RowNum d1 d2
0 0 2 2
1 1 2 2
In [8]:
mydefs2 = qh2.defs()
In [9]:
%%gor mypandaframe <<
$mydefs2
gor [c2] | calc dummy #d1# | multimap -cartesian [#c1#]
Query ran in 0.09 sec
Query fetched 4 rows in 0.02 sec (total time 0.11 sec)
                                             
In [10]:
mypandaframe.describe()
Out[10]:
gene_start gene_end dummy RowNum rand
count 4.000000 4.000000 4.0 4.00000 4.000000
mean 13135.500000 21989.500000 2.0 0.50000 0.203145
std 1463.582932 8753.207431 0.0 0.57735 0.010190
min 11868.000000 14409.000000 2.0 0.00000 0.194320
25% 11868.000000 14409.000000 2.0 0.00000 0.194320
50% 13135.500000 21989.500000 2.0 0.50000 0.203145
75% 14403.000000 29570.000000 2.0 1.00000 0.211970
max 14403.000000 29570.000000 2.0 1.00000 0.211970
In [ ]:
mypandaframe
In [11]:
import nextcode
qs = nextcode.get_service("queryserver")
In [12]:
myquery = f"""{qh.defs()}
nor [#c1#]"""
print(myquery)
create #c1# = norrows 2 | calc rand random();
def #d1# = 1+1;

nor [#c1#]
In [13]:
qs.execute(myquery).dataframe()
Out[13]:
RowNum rand
0 0 0.21197
1 1 0.19432
In [ ]: