From 1d9bc57d8b90cb0e0a026d60cb1c3a0a84e70a7d Mon Sep 17 00:00:00 2001
From: Andy Hayden <andyhayden1@gmail.com>
Date: Sun, 17 Nov 2013 02:08:44 -0800
Subject: [PATCH 1/3] ENH nlargest and nsmallest Series methods

---
 doc/source/v0.13.1.txt      |  1 +
 pandas/algos.pyx            | 60 +++++++++++++++++++----------
 pandas/core/series.py       | 77 ++++++++++++++++++++++++++++++-------
 pandas/tests/test_series.py | 33 ++++++++++++++++
 pandas/tools/util.py        | 70 ++++++++++++++++++++++++++++++++-
 vb_suite/series_methods.py  | 29 ++++++++++++++
 6 files changed, 234 insertions(+), 36 deletions(-)
 create mode 100644 vb_suite/series_methods.py

diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt
index b48f555f9691a..557abfc48a023 100644
--- a/doc/source/v0.13.1.txt
+++ b/doc/source/v0.13.1.txt
@@ -128,6 +128,7 @@ API changes
       import pandas.core.common as com
       com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan]))
       np.array_equal(np.array([0, np.nan]), np.array([0, np.nan]))
+- Add nsmallest and nlargest Series methods (:issue:`3960`)
 
 - ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a
   ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 3b527740505e4..19a80c986af8e 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -21,6 +21,9 @@ from numpy cimport NPY_FLOAT16 as NPY_float16
 from numpy cimport NPY_FLOAT32 as NPY_float32
 from numpy cimport NPY_FLOAT64 as NPY_float64
 
+from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+                    uint32_t, uint64_t, float16_t, float32_t, float64_t)
+
 int8 = np.dtype(np.int8)
 int16 = np.dtype(np.int16)
 int32 = np.dtype(np.int32)
@@ -736,16 +739,34 @@ def _check_minp(win, minp, N):
 # Physical description: 366 p.
 #               Series: Prentice-Hall Series in Automatic Computation
 
-def kth_smallest(ndarray[double_t] a, Py_ssize_t k):
-    cdef:
-        Py_ssize_t i,j,l,m,n
-        double_t x, t
+ctypedef fused kth_type:
+    int8_t
+    int16_t
+    int32_t
+    int64_t
 
-    n = len(a)
+    uint8_t
+    uint16_t
+    uint32_t
+    uint64_t
 
-    l = 0
-    m = n-1
-    while (l<m):
+    float32_t
+    float64_t
+
+
+cdef void swap_kth(kth_type *a, kth_type *b):
+    cdef kth_type t
+    t = a[0]
+    a[0] = b[0]
+    b[0] = t
+
+
+cpdef kth_type kth_smallest(kth_type[:] a, Py_ssize_t k):
+    cdef:
+        Py_ssize_t i, j, l = 0, n = a.size, m = n - 1
+        kth_type x
+
+    while l < m:
         x = a[k]
         i = l
         j = m
@@ -754,9 +775,7 @@ def kth_smallest(ndarray[double_t] a, Py_ssize_t k):
             while a[i] < x: i += 1
             while x < a[j]: j -= 1
             if i <= j:
-                t = a[i]
-                a[i] = a[j]
-                a[j] = t
+                swap_kth(&a[i], &a[j])
                 i += 1; j -= 1
 
             if i > j: break
@@ -765,6 +784,7 @@ def kth_smallest(ndarray[double_t] a, Py_ssize_t k):
         if k < i: m = j
     return a[k]
 
+
 cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
     cdef:
         Py_ssize_t i,j,l,m
@@ -781,9 +801,7 @@ cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
             while a[i] < x: i += 1
             while x < a[j]: j -= 1
             if i <= j:
-                t = a[i]
-                a[i] = a[j]
-                a[j] = t
+                swap_kth(&a[i], &a[j])
                 i += 1; j -= 1
 
             if i > j: break
@@ -793,22 +811,22 @@ cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
     return a[k]
 
 
-def median(ndarray arr):
+cpdef kth_type median(kth_type[:] arr):
     '''
     A faster median
     '''
-    cdef int n = len(arr)
+    cdef Py_ssize_t n = arr.size
 
-    if len(arr) == 0:
+    if n == 0:
         return np.NaN
 
     arr = arr.copy()
 
     if n % 2:
-        return kth_smallest(arr, n / 2)
+        return kth_smallest(arr, n // 2)
     else:
-        return (kth_smallest(arr, n / 2) +
-                kth_smallest(arr, n / 2 - 1)) / 2
+        return (kth_smallest(arr, n // 2) +
+                kth_smallest(arr, n // 2 - 1)) / 2
 
 
 # -------------- Min, Max subsequence
@@ -2226,7 +2244,7 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
 
 
     if n % 2:
-        result = kth_smallest_c(a, n / 2, n)
+        result = kth_smallest_c( a, n / 2, n)
     else:
         result = (kth_smallest_c(a, n / 2, n) +
                   kth_smallest_c(a, n / 2 - 1, n)) / 2
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d95f8da8097e9..0151c55f91430 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -6,7 +6,6 @@
 # pylint: disable=E1101,E1103
 # pylint: disable=W0703,W0622,W0613,W0201
 
-import operator
 import types
 import warnings
 
@@ -15,21 +14,16 @@
 import numpy.ma as ma
 
 from pandas.core.common import (isnull, notnull, _is_bool_indexer,
-                                _default_index, _maybe_promote, _maybe_upcast,
-                                _asarray_tuplesafe, is_integer_dtype,
-                                _NS_DTYPE, _TD_DTYPE,
-                                _infer_dtype_from_scalar, is_list_like,
-                                _values_from_object,
+                                _default_index, _maybe_upcast,
+                                _asarray_tuplesafe, _infer_dtype_from_scalar,
+                                is_list_like, _values_from_object,
                                 _possibly_cast_to_datetime, _possibly_castable,
-                                _possibly_convert_platform,
-                                _try_sort,
+                                _possibly_convert_platform, _try_sort,
                                 ABCSparseArray, _maybe_match_name,
                                 _ensure_object, SettingWithCopyError)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index)
-from pandas.core.indexing import (
-    _check_bool_indexer,
-    _is_index_slice, _maybe_convert_indices)
+from pandas.core.indexing import _check_bool_indexer, _maybe_convert_indices
 from pandas.core import generic, base
 from pandas.core.internals import SingleBlockManager
 from pandas.core.categorical import Categorical
@@ -37,7 +31,7 @@
 from pandas.tseries.period import PeriodIndex, Period
 from pandas import compat
 from pandas.util.terminal import get_terminal_size
-from pandas.compat import zip, lzip, u, OrderedDict
+from pandas.compat import zip, u, OrderedDict
 
 import pandas.core.array as pa
 import pandas.core.ops as ops
@@ -46,7 +40,7 @@
 import pandas.core.datetools as datetools
 import pandas.core.format as fmt
 import pandas.core.nanops as nanops
-from pandas.util.decorators import Appender, Substitution, cache_readonly
+from pandas.util.decorators import Appender, cache_readonly
 
 import pandas.lib as lib
 import pandas.tslib as tslib
@@ -1705,7 +1699,17 @@ def _try_kind_sort(arr):
         good = ~bad
         idx = pa.arange(len(self))
 
-        argsorted = _try_kind_sort(arr[good])
+        def _try_kind_sort(arr, kind='mergesort'):
+            # easier to ask forgiveness than permission
+            try:
+                # if kind==mergesort, it can fail for object dtype
+                return arr.argsort(kind=kind)
+            except TypeError:
+                # stable sort not available for object dtype
+                # uses the argsort default quicksort
+                return arr.argsort(kind='quicksort')
+
+        argsorted = _try_kind_sort(arr[good], kind=kind)
 
         if not ascending:
             argsorted = argsorted[::-1]
@@ -1728,6 +1732,51 @@ def _try_kind_sort(arr):
         else:
             return result.__finalize__(self)
 
+    def nlargest(self, n=5, take_last=False):
+        '''
+        Returns the largest n rows:
+
+        May be faster than .order(ascending=False).head(n).
+
+        '''
+        # TODO remove need for dropna ?
+        dropped = self.dropna()
+
+        from pandas.tools.util import nlargest
+
+        if dropped.dtype == object:
+            try:
+                dropped = dropped.astype(float)
+            except (NotImplementedError, TypeError):
+                return dropped.order(ascending=False).head(n)
+
+        inds = nlargest(dropped.values, n, take_last)
+        if len(inds) == 0:
+            # TODO remove this special case
+            return dropped[[]]
+        return dropped.iloc[inds]
+
+    def nsmallest(self, n=5, take_last=False):
+        '''
+        Returns the smallest n rows.
+
+        May be faster than .order().head(n).
+
+        '''
+        # TODO remove need for dropna ?
+        dropped = self.dropna()
+
+        from pandas.tools.util import nsmallest
+        try:
+            inds = nsmallest(dropped.values, n, take_last)
+        except NotImplementedError:
+            return dropped.order().head(n)
+
+        if len(inds) == 0:
+            # TODO remove this special case
+            return dropped[[]]
+        return dropped.iloc[inds]
+
     def sortlevel(self, level=0, ascending=True, sort_remaining=True):
         """
         Sort Series with MultiIndex by chosen level. Data will be
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 6e7c9edfc4025..e13a827cc8d00 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -3998,6 +3998,39 @@ def test_order(self):
         ordered = ts.order(ascending=False, na_position='first')
         assert_almost_equal(expected, ordered.valid().values)
 
+    def test_nsmallest_nlargest(self):
+        # float, int, datetime64 (use i8), timedelts64 (same),
+        # object that are numbers, object that are strings
+
+        s_list = [Series([3, 2, 1, 2, 5]),
+                  Series([3., 2., 1., 2., 5.]),
+                  Series([3., 2, 1, 2, 5], dtype='object'),
+                  Series([3., 2, 1, 2, '5'], dtype='object'),
+                  Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005']))]
+
+        for s in s_list:
+
+            assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]])
+            assert_series_equal(s.nsmallest(2, take_last=True), s.iloc[[2, 3]])
+
+            assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]])
+            assert_series_equal(s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]])
+
+            empty = s.iloc[0:0]
+            assert_series_equal(s.nsmallest(0), empty)
+            assert_series_equal(s.nsmallest(-1), empty)
+            assert_series_equal(s.nlargest(0), empty)
+            assert_series_equal(s.nlargest(-1), empty)
+
+            assert_series_equal(s.nsmallest(len(s)), s.order())
+            assert_series_equal(s.nsmallest(len(s) + 1), s.order())
+            assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]])
+            assert_series_equal(s.nlargest(len(s) + 1), s.iloc[[4, 0, 1, 3, 2]])
+
+        s = Series([3., np.nan, 1, 2, 5])
+        assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]])
+        assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]])
+
     def test_rank(self):
         from pandas.compat.scipy import rankdata
 
diff --git a/pandas/tools/util.py b/pandas/tools/util.py
index 6dbefc4b70930..6a8cd0e358f87 100644
--- a/pandas/tools/util.py
+++ b/pandas/tools/util.py
@@ -1,6 +1,9 @@
 from pandas.compat import reduce
 from pandas.core.index import Index
 import numpy as np
+from pandas import algos
+import pandas.core.common as com
+
 
 def match(needles, haystack):
     haystack = Index(haystack)
@@ -17,7 +20,7 @@ def cartesian_product(X):
     --------
     >>> cartesian_product([list('ABC'), [1, 2]])
     [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
- 	array([1, 2, 1, 2, 1, 2])]
+    array([1, 2, 1, 2, 1, 2])]
 
     '''
 
@@ -43,3 +46,68 @@ def compose(*funcs):
     """Compose 2 or more callables"""
     assert len(funcs) > 1, 'At least 2 callables must be passed to compose'
     return reduce(_compose2, funcs)
+
+
+_dtype_map = {'datetime64[ns]': 'int64', 'int64': 'int64',
+              'float64': 'float64'}
+
+
+def nsmallest(arr, n=5, take_last=False):
+    '''
+    Find the indices of the n smallest values of a numpy array.
+
+    Note: Fails silently with NaN.
+
+    '''
+    if n <= 0:
+        return np.array([])  # empty
+    elif n >= len(arr):
+        n = len(arr)
+
+    if arr.dtype == object:
+        # just sort and take n
+        return arr.argsort(kind='mergesort')[:n]
+
+    try:
+        dtype = _dtype_map[str(arr.dtype)]
+    except KeyError:
+        raise NotImplementedError("Not implemented for %s dtype, "
+                                  "perhaps convert to int64 or float64, "
+                                  "or use .order().head(n)") % arr.dtype
+
+    arr = arr.view(dtype)
+
+    if take_last:
+        arr = arr[::-1]
+
+    kth_val = algos.kth_smallest(arr.copy(), n - 1)
+
+    ns, = np.nonzero(arr <= kth_val)
+    inds = ns[arr[ns].argsort(kind='mergesort')][:n]
+
+    if take_last:
+        # reverse indices
+        return len(arr) - 1 - inds
+    return inds
+
+
+def nlargest(arr, n=5, take_last=False):
+    '''
+    Find the indices of the n largest values of a numpy array.
+
+    Note: Fails silently with NaN.
+
+    '''
+    if n <= 0:
+        return np.array([])  # empty
+
+    n = min(n, len(arr))
+
+    if arr.dtype == object:
+        try:
+            arr = arr.astype(float)
+        except:
+            raise TypeError("An object array must convert to float.")
+
+    arr = -arr.view(_dtype_map[str(arr.dtype)])
+    return nsmallest(arr, n, take_last=take_last)
diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py
new file mode 100644
index 0000000000000..1659340cfe050
--- /dev/null
+++ b/vb_suite/series_methods.py
@@ -0,0 +1,29 @@
+from vbench.api import Benchmark
+from datetime import datetime
+
+common_setup = """from pandas_vb_common import *
+"""
+
+setup = common_setup + """
+s1 = Series(np.random.randn(10000))
+s2 = Series(np.random.randint(1, 10, 10000))
+"""
+
+series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);'
+                             's1.nlargest(3, take_last=False)',
+                             setup,
+                             start_date=datetime(2014, 1, 25))
+series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);'
+                             's2.nlargest(3, take_last=False)',
+                             setup,
+                             start_date=datetime(2014, 1, 25))
+
+series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);'
+                              's1.nsmallest(3, take_last=False)',
+                              setup,
+                              start_date=datetime(2014, 1, 25))
+
+series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);'
+                              's2.nsmallest(3, take_last=False)',
+                              setup,
+                              start_date=datetime(2014, 1, 25))

From a909e1649bb947736c3bcac8829af3ba3532a4a3 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 12 May 2014 09:21:13 -0400
Subject: [PATCH 2/3] DOC/REF: add docstrings and DRY it up

---
 doc/source/v0.13.1.txt      |   1 -
 doc/source/v0.14.0.txt      |   1 +
 pandas/algos.pyx            |  27 ++++++----
 pandas/core/algorithms.py   |  88 +++++++++++++++++++++++++++++--
 pandas/core/series.py       | 100 ++++++++++++++++++++----------------
 pandas/tests/test_series.py |  43 +++++++++++++---
 pandas/tools/util.py        |  67 +-----------------------
 7 files changed, 197 insertions(+), 130 deletions(-)

diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt
index 557abfc48a023..b48f555f9691a 100644
--- a/doc/source/v0.13.1.txt
+++ b/doc/source/v0.13.1.txt
@@ -128,7 +128,6 @@ API changes
       import pandas.core.common as com
       com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan]))
       np.array_equal(np.array([0, np.nan]), np.array([0, np.nan]))
-- Add nsmallest and nlargest Series methods (:issue:`3960`)
 
 - ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a
   ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
index 8182bff92fb63..c033debbb6808 100644
--- a/doc/source/v0.14.0.txt
+++ b/doc/source/v0.14.0.txt
@@ -643,6 +643,7 @@ Enhancements
                 values='Quantity', aggfunc=np.sum)
 
 - str.wrap implemented (:issue:`6999`)
+- Add nsmallest and nlargest Series methods (:issue:`3960`)
 
 - `PeriodIndex` fully supports partial string indexing like `DatetimeIndex` (:issue:`7043`)
 
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 19a80c986af8e..431ef97debae6 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -739,7 +739,8 @@ def _check_minp(win, minp, N):
 # Physical description: 366 p.
 #               Series: Prentice-Hall Series in Automatic Computation
 
-ctypedef fused kth_type:
+
+ctypedef fused numeric:
     int8_t
     int16_t
     int32_t
@@ -754,17 +755,25 @@ ctypedef fused kth_type:
     float64_t
 
 
-cdef void swap_kth(kth_type *a, kth_type *b):
-    cdef kth_type t
+cdef inline Py_ssize_t swap(numeric *a, numeric *b) except -1:
+    cdef numeric t
+
+    # cython doesn't allow pointer dereference so use array syntax
     t = a[0]
     a[0] = b[0]
     b[0] = t
+    return 0
 
 
-cpdef kth_type kth_smallest(kth_type[:] a, Py_ssize_t k):
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k):
     cdef:
-        Py_ssize_t i, j, l = 0, n = a.size, m = n - 1
-        kth_type x
+        Py_ssize_t i, j, l, m, n = a.size
+        numeric x
+
+    l = 0
+    m = n - 1
 
     while l < m:
         x = a[k]
@@ -775,7 +784,7 @@ cpdef kth_type kth_smallest(kth_type[:] a, Py_ssize_t k):
             while a[i] < x: i += 1
             while x < a[j]: j -= 1
             if i <= j:
-                swap_kth(&a[i], &a[j])
+                swap(&a[i], &a[j])
                 i += 1; j -= 1
 
             if i > j: break
@@ -801,7 +810,7 @@ cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
             while a[i] < x: i += 1
             while x < a[j]: j -= 1
             if i <= j:
-                swap_kth(&a[i], &a[j])
+                swap(&a[i], &a[j])
                 i += 1; j -= 1
 
             if i > j: break
@@ -811,7 +820,7 @@ cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n):
     return a[k]
 
 
-cpdef kth_type median(kth_type[:] arr):
+cpdef numeric median(numeric[:] arr):
     '''
     A faster median
     '''
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 002d5480b9b7b..954f18ccb69b8 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -9,9 +9,7 @@
 import pandas.core.common as com
 import pandas.algos as algos
 import pandas.hashtable as htable
-import pandas.compat as compat
-from pandas.compat import filter, string_types
-from pandas.util.decorators import deprecate_kwarg
+from pandas.compat import string_types
 
 def match(to_match, values, na_sentinel=-1):
     """
@@ -413,6 +411,90 @@ def group_position(*args):
     return result
 
 
+_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'}
+
+
+def _finalize_nsmallest(arr, kth_val, n, take_last, narr):
+    ns, = np.nonzero(arr <= kth_val)
+    inds = ns[arr[ns].argsort(kind='mergesort')][:n]
+
+    if take_last:
+        # reverse indices
+        return narr - 1 - inds
+    return inds
+
+
+def nsmallest(arr, n, take_last=False):
+    '''
+    Find the indices of the n smallest values of a numpy array.
+
+    Note: Fails silently with NaN.
+
+    '''
+    if take_last:
+        arr = arr[::-1]
+
+    narr = len(arr)
+    n = min(n, narr)
+
+    sdtype = str(arr.dtype)
+    arr = arr.view(_dtype_map.get(sdtype, sdtype))
+
+    kth_val = algos.kth_smallest(arr.copy(), n - 1)
+    return _finalize_nsmallest(arr, kth_val, n, take_last, narr)
+
+
+def nlargest(arr, n, take_last=False):
+    """
+    Find the indices of the n largest values of a numpy array.
+
+    Note: Fails silently with NaN.
+    """
+    sdtype = str(arr.dtype)
+    arr = arr.view(_dtype_map.get(sdtype, sdtype))
+    return nsmallest(-arr, n, take_last=take_last)
+
+
+def select_n_slow(dropped, n, take_last, method):
+    reverse_it = take_last or method == 'nlargest'
+    ascending = method == 'nsmallest'
+    slc = np.s_[::-1] if reverse_it else np.s_[:]
+    return dropped[slc].order(ascending=ascending).head(n)
+
+
+_select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest}
+
+
+def select_n(series, n, take_last, method):
+    """Implement n largest/smallest.
+
+    Parameters
+    ----------
+    n : int
+    take_last : bool
+    method : str, {'nlargest', 'nsmallest'}
+
+    Returns
+    -------
+    nordered : Series
+    """
+    dtype = series.dtype
+    if not issubclass(dtype.type, (np.integer, np.floating, np.datetime64,
+                                   np.timedelta64)):
+        raise TypeError("Cannot use method %r with dtype %s" % (method, dtype))
+
+    if n <= 0:
+        return series[[]]
+
+    dropped = series.dropna()
+
+    if n >= len(series):
+        return select_n_slow(dropped, n, take_last, method)
+
+    inds = _select_methods[method](dropped.values, n, take_last)
+    return dropped.iloc[inds]
+
+
 _rank1d_functions = {
     'float64': algos.rank_1d_float64,
     'int64': algos.rank_1d_int64,
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0151c55f91430..1637ba49d86a2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -35,6 +35,7 @@
 
 import pandas.core.array as pa
 import pandas.core.ops as ops
+from pandas.core.algorithms import select_n
 
 import pandas.core.common as com
 import pandas.core.datetools as datetools
@@ -1699,17 +1700,7 @@ def _try_kind_sort(arr):
         good = ~bad
         idx = pa.arange(len(self))
 
-        def _try_kind_sort(arr, kind='mergesort'):
-            # easier to ask forgiveness than permission
-            try:
-                # if kind==mergesort, it can fail for object dtype
-                return arr.argsort(kind=kind)
-            except TypeError:
-                # stable sort not available for object dtype
-                # uses the argsort default quicksort
-                return arr.argsort(kind='quicksort')
-
-        argsorted = _try_kind_sort(arr[good], kind=kind)
+        argsorted = _try_kind_sort(arr[good])
 
         if not ascending:
             argsorted = argsorted[::-1]
@@ -1733,49 +1724,70 @@ def _try_kind_sort(arr, kind='mergesort'):
             return result.__finalize__(self)
 
     def nlargest(self, n=5, take_last=False):
-        '''
-        Returns the largest n rows:
+        """Return the largest `n` elements.
 
-        May be faster than .order(ascending=False).head(n).
+        Parameters
+        ----------
+        n : int
+            Return this many descending sorted values
+        take_last : bool
+            Where there are duplicate values, take the last duplicate
 
-        '''
-        # TODO remove need for dropna ?
-        dropped = self.dropna()
+        Returns
+        -------
+        top_n : Series
+            The n largest values in the Series, in sorted order
 
-        from pandas.tools.util import nlargest
+        Notes
+        -----
+        Faster than ``.order(ascending=False).head(n)`` for small `n` relative
+        to the size of the ``Series`` object.
 
-        if dropped.dtype == object:
-            try:
-                dropped = dropped.astype(float)
-            except (NotImplementedError, TypeError):
-                return dropped.order(ascending=False).head(n)
+        See Also
+        --------
+        Series.nsmallest
 
-        inds = nlargest(dropped.values, n, take_last)
-        if len(inds) == 0:
-            # TODO remove this special case
-            return dropped[[]]
-        return dropped.iloc[inds]
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> s = pd.Series(np.random.randn(1e6))
+        >>> s.nlargest(10)  # only sorts up to the N requested
+        """
+        return select_n(self, n=n, take_last=take_last, method='nlargest')
 
     def nsmallest(self, n=5, take_last=False):
-        '''
-        Returns the smallest n rows.
+        """Return the smallest `n` elements.
 
-        May be faster than .order().head(n).
+        Parameters
+        ----------
+        n : int
+            Return this many ascending sorted values
+        take_last : bool
+            Where there are duplicate values, take the last duplicate
 
-        '''
-        # TODO remove need for dropna ?
-        dropped = self.dropna()
+        Returns
+        -------
+        bottom_n : Series
+            The n smallest values in the Series, in sorted order
 
-        from pandas.tools.util import nsmallest
-        try:
-            inds = nsmallest(dropped.values, n, take_last)
-        except NotImplementedError:
-            return dropped.order().head(n)
-
-        if len(inds) == 0:
-            # TODO remove this special case
-            return dropped[[]]
-        return dropped.iloc[inds]
+        Notes
+        -----
+        Faster than ``.order().head(n)`` for small `n` relative to
+        the size of the ``Series`` object.
+
+        See Also
+        --------
+        Series.nlargest
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> s = pd.Series(np.random.randn(1e6))
+        >>> s.nsmallest(10)  # only sorts up to the N requested
+        """
+        return select_n(self, n=n, take_last=take_last, method='nsmallest')
 
     def sortlevel(self, level=0, ascending=True, sort_remaining=True):
         """
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index e13a827cc8d00..434c21bfa76de 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -4002,11 +4002,38 @@ def test_nsmallest_nlargest(self):
         # float, int, datetime64 (use i8), timedelts64 (same),
         # object that are numbers, object that are strings
 
-        s_list = [Series([3, 2, 1, 2, 5]),
-                  Series([3., 2., 1., 2., 5.]),
-                  Series([3., 2, 1, 2, 5], dtype='object'),
-                  Series([3., 2, 1, 2, '5'], dtype='object'),
-                  Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005']))]
+        base = [3, 2, 1, 2, 5]
+
+        s_list = [
+            Series(base, dtype='int8'),
+            Series(base, dtype='int16'),
+            Series(base, dtype='int32'),
+            Series(base, dtype='int64'),
+            Series(base, dtype='float32'),
+            Series(base, dtype='float64'),
+            Series(base, dtype='uint8'),
+            Series(base, dtype='uint16'),
+            Series(base, dtype='uint32'),
+            Series(base, dtype='uint64'),
+            Series(base).astype('timedelta64[ns]'),
+            Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005'])),
+        ]
+
+        raising = [
+            Series([3., 2, 1, 2, '5'], dtype='object'),
+            Series([3., 2, 1, 2, 5], dtype='object'),
+            Series([3., 2, 1, 2, 5], dtype='complex256'),
+            Series([3., 2, 1, 2, 5], dtype='complex128'),
+        ]
+
+        for r in raising:
+            dt = r.dtype
+            msg = "Cannot use method 'n(larg|small)est' with dtype %s" % dt
+            args = 2, len(r), 0, -1
+            methods = r.nlargest, r.nsmallest
+            for method, arg in product(methods, args):
+                with tm.assertRaisesRegexp(TypeError, msg):
+                    method(arg)
 
         for s in s_list:
 
@@ -4014,7 +4041,8 @@ def test_nsmallest_nlargest(self):
             assert_series_equal(s.nsmallest(2, take_last=True), s.iloc[[2, 3]])
 
             assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]])
-            assert_series_equal(s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]])
+            assert_series_equal(s.nlargest(3, take_last=True),
+                                s.iloc[[4, 0, 3]])
 
             empty = s.iloc[0:0]
             assert_series_equal(s.nsmallest(0), empty)
@@ -4025,7 +4053,8 @@ def test_nsmallest_nlargest(self):
             assert_series_equal(s.nsmallest(len(s)), s.order())
             assert_series_equal(s.nsmallest(len(s) + 1), s.order())
             assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]])
-            assert_series_equal(s.nlargest(len(s) + 1), s.iloc[[4, 0, 1, 3, 2]])
+            assert_series_equal(s.nlargest(len(s) + 1),
+                                s.iloc[[4, 0, 1, 3, 2]])
 
         s = Series([3., np.nan, 1, 2, 5])
         assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]])
diff --git a/pandas/tools/util.py b/pandas/tools/util.py
index 6a8cd0e358f87..1d6ed3e11c81e 100644
--- a/pandas/tools/util.py
+++ b/pandas/tools/util.py
@@ -1,8 +1,8 @@
+import operator
 from pandas.compat import reduce
 from pandas.core.index import Index
 import numpy as np
 from pandas import algos
-import pandas.core.common as com
 
 
 def match(needles, haystack):
@@ -46,68 +46,3 @@ def compose(*funcs):
     """Compose 2 or more callables"""
     assert len(funcs) > 1, 'At least 2 callables must be passed to compose'
     return reduce(_compose2, funcs)
-
-
-_dtype_map = {'datetime64[ns]': 'int64', 'int64': 'int64',
-              'float64': 'float64'}
-
-
-def nsmallest(arr, n=5, take_last=False):
-    '''
-    Find the indices of the n smallest values of a numpy array.
-
-    Note: Fails silently with NaN.
-
-    '''
-    if n <= 0:
-        return np.array([])  # empty
-    elif n >= len(arr):
-        n = len(arr)
-
-    if arr.dtype == object:
-        # just sort and take n
-        return arr.argsort(kind='mergesort')[:n]
-
-    try:
-        dtype = _dtype_map[str(arr.dtype)]
-    except KeyError:
-        raise NotImplementedError("Not implemented for %s dtype, "
-                                  "perhaps convert to int64 or float64, "
-                                  "or use .order().head(n)") % arr.dtype
-
-    arr = arr.view(dtype)
-
-    if take_last:
-        arr = arr[::-1]
-
-    kth_val = algos.kth_smallest(arr.copy(), n - 1)
-
-    ns, = np.nonzero(arr <= kth_val)
-    inds = ns[arr[ns].argsort(kind='mergesort')][:n]
-
-    if take_last:
-        # reverse indices
-        return len(arr) - 1 - inds
-    return inds
-
-
-def nlargest(arr, n=5, take_last=False):
-    '''
-    Find the indices of the n largest values of a numpy array.
-
-    Note: Fails silently with NaN.
-
-    '''
-    if n <= 0:
-        return np.array([])  # empty
-
-    n = min(n, len(arr))
-
-    if arr.dtype == object:
-        try:
-            arr = arr.astype(float)
-        except:
-            raise TypeError("An object array must convert to float.")
-
-    arr = -arr.view(_dtype_map[str(arr.dtype)])
-    return nsmallest(arr, n, take_last=take_last)

From 66737055d3fdc4e8cc13505b7ac9f565aae69765 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 14 May 2014 16:27:55 -0400
Subject: [PATCH 3/3] DOC: doc blurb in basics.rst

---
 doc/source/basics.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index ca9751569336c..5aa84f46debea 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -1311,6 +1311,21 @@ Some other sorting notes / nuances:
     compatibility with NumPy methods which expect the ``ndarray.sort``
     behavior.
 
+.. versionadded:: 0.14.0
+
+``Series`` has the ``nsmallest`` and ``nlargest`` methods which return the
+smallest or largest :math:`n` values. For a large ``Series`` this can be much
+faster than sorting the entire Series and calling ``head(n)`` on the result.
+
+.. ipython:: python
+
+   s = Series(np.random.permutation(10))
+   s
+   s.order()
+   s.nsmallest(3)
+   s.nlargest(3)
+
+
 Sorting by a multi-index column
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~