@@ -372,8 +372,8 @@ In pandas, you can use :meth:`~pandas.concat` in conjunction with
372
372
373
373
pd.concat([df1, df2]).drop_duplicates()
374
374
375
- SOME ANALYTIC AND AGGREGATE FUNCTIONS
376
- -------------------------------------
375
+ Pandas equivalents for some SQL analytic and aggregate functions
376
+ ----------------------------------------------------------------
377
377
Top N rows with offset
378
378
379
379
.. code-block :: sql
@@ -383,16 +383,11 @@ Top N rows with offset
383
383
ORDER BY tip DESC
384
384
LIMIT 10 OFFSET 5;
385
385
386
- -- Oracle 12c+
387
- SELECT * FROM tips
388
- ORDER BY tip DESC
389
- OFFSET 5 ROWS FETCH NEXT 10 ROWS ONLY;
390
-
391
386
In pandas:
392
387
393
388
.. ipython :: python
394
389
395
- tips.sort_values([ ' tip ' ], ascending = False ).head( 10 + 5 ).tail(10 )
390
+ tips.nlargest( 10 + 5 , columns = ' tip ' ).tail(10 )
396
391
397
392
Top N rows per group
398
393
@@ -428,31 +423,9 @@ the same using `rank(method='first')` function
428
423
tips[' rnk' ] = tips.groupby([' day' ])[' total_bill' ].rank(method = ' first' , ascending = False )
429
424
tips.loc[tips[' rnk' ] < 3 ].sort_values([' day' ,' rnk' ])
430
425
431
- Top second and top third total bills per day
432
-
433
- .. code-block :: sql
434
-
435
- -- Oracle
436
- SELECT * FROM (
437
- SELECT
438
- t.*,
439
- ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
440
- FROM tips t
441
- )
442
- WHERE rn BETWEEN 2 and 3
443
- ORDER BY day, rn;
444
-
445
- .. ipython :: python
446
-
447
- tips[' rn' ] = tips.sort_values([' total_bill' ], ascending = False ) \
448
- .groupby([' day' ]) \
449
- .cumcount() + 1
450
- tips.loc[tips[' rn' ].between(2 , 3 )].sort_values([' day' ,' rn' ])
451
-
452
-
453
426
.. code-block :: sql
454
427
455
- -- Oracle
428
+ -- Oracle's RANK() analytic function
456
429
SELECT * FROM (
457
430
SELECT
458
431
t.*,
0 commit comments