diff --git a/.travis.yml b/.travis.yml index 8e2bb49d9df93..30f09deefd93a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,14 +5,6 @@ python: env: global: - - secure: "O04RU5QRKEDL/SrIWEsVe8O+1TxZqZQSa28Sd+Fz48NW/XddhefYyxzqcUXh\nk/NjWMqknJRQhApLolBianVpsE577OTllzlcyKn3nUL6hjOXcoszGaYray7S\niNGKGyO8xrtB/ZQDtmupz0ksK8sLoCTscdiGotFulczbx0zt+4g=" - - secure: "PUJ9nC1/v2vpFUtELSoSjI53OHCVXfFTb8+t5lIGIqHtjUBkhiJSNPfCv8Bx\ndsdrx30qP8KsSceYzaa/bog6p8YNU1iih23S0KbjucutvA0LNHBTNvnxmjBR\nSJfKd5FmwnXvizRyghYBzmQ3NmGO7ADw2DBwKOhgGMqCHZ8Tlc8=" - - secure: "IDcMrCCW+6pgJtsI3Q163OPc0iec1ogpitaqiRhHcrEBUCXZgVeclOeiZBlw\n/u+uGyW/O0NhHMaFXKB8BdDVwlQEEHv48syN6npS/A5+O6jriWKL4ozttOhE\npOlu+yLhHnEwx6wZVIHRTVn+t1GkOrjlBcjaQi+Z13G3XmDaSG8=" - - secure: "Zu9aj0dTGpvMqT/HqBGQgDYl/v5ubC7lFwfE8Fqb0N1UVXqbpjXnNH/7oal1\nUsIT7klO++LWm+LxsP/A1FWENTSgdYe99JQtNyauW+0x5YR1JTuDJ8atDgx9\nSq66CaVpS5t+ov7UVm2bKSUX+1S8+8zGbIDADrMxEzYEMF7WoGM=" - - secure: "AfIvLxvCxj22zrqg3ejGf/VePKT2AyGT9erYzlKpBS0H8yi5Pp1MfmJjhaR4\n51zBtzqHPHiIEY6ZdE06o9PioMWkXS+BqJNrxGSbt1ltxgOFrxW5zOpwiFGZ\nZOv1YeFkuPf8PEsWT7615mdydqTQT7B0pqUKK/d6aka4TQ/tg5Q=" - - secure: "EM4ySBUusReNu7H1QHXvjnP/J1QowvfpwEBmjysYxJuq7KcG8HhhlfpUF+Gh\nLBzLak9QBA67k4edhum3qtKuJR5cHuja3+zuV8xmx096B/m96liJFTrwZpea\n58op3W6ZULctEpQNgIkyae20bjxl4f99JhZRUlonoPfx/rBIMFc=" - - secure: "pgMYS/6MQqDGb58qdzTJesvAMmcJWTUEEM8gf9rVbfqfxceOL4Xpx8siR9B2\nC4U4MW1cHMPP3RFEb4Jy0uK49aHH10snwZY1S84YPPllpH5ZFXVdN68OayNj\nh4k5N/2hhaaQuJ6Uh8v8s783ye4oYTOW5RJUFqQu4QdG4IkTIMs=" - - NOSE_ARGS="not slow" UPLOAD=true matrix: @@ -41,7 +33,6 @@ before_install: # - export APT_ARGS=-qq # comment this to debug travis install issues # - set -x # enable this to see bash commands - export ZIP_FLAGS=-q # comment this to debug travis install issues - - source ci/envars.sh # we need to source this to bring in the envars - ci/before_install.sh - python -V diff --git a/ci/before_install.sh b/ci/before_install.sh index 677ddfa642f80..e4376e1bf21c2 100755 --- a/ci/before_install.sh +++ b/ci/before_install.sh @@ -10,27 +10,4 @@ echo "inside $0" # overview sudo apt-get update $APT_ARGS # run apt-get update for all versions -if $PLEASE_TRAVIS_FASTER ; then - echo "Faster? well... I'll try." - - if $CACHE_FILE_AVAILABLE ; then - echo retrieving "$CACHE_FILE_URL"; - - wget -q "$CACHE_FILE_URL" -O "/tmp/_$CYTHON_HASH.zip"; - unzip $ZIP_FLAGS /tmp/_"$CYTHON_HASH.zip" -d "$BUILD_CACHE_DIR"; - rm -f /tmp/_"$CYTHON_HASH.zip" - # copy cythonized c files over - cp -R "$BUILD_CACHE_DIR"/pandas/*.c pandas/ - cp -R "$BUILD_CACHE_DIR"/pandas/src/*.c pandas/src/ - fi; - echo "VENV_FILE_AVAILABLE=$VENV_FILE_AVAILABLE" - if $VENV_FILE_AVAILABLE ; then - echo "getting venv" - wget -q $VENV_FILE_URL -O "/tmp/venv.zip"; - sudo unzip $ZIP_FLAGS -o /tmp/venv.zip -d "/"; - sudo chown travis -R "$VIRTUAL_ENV" - rm -f /tmp/_"$CYTHON_HASH.zip" - fi; -fi - true # never fail because bad things happened here diff --git a/ci/envars.sh b/ci/envars.sh deleted file mode 100755 index 2b4cacfd96fe4..0000000000000 --- a/ci/envars.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - -# This must be sourced by .travis.yml, so any envars exported here will -# be available to the rest of the build stages - -# - computes a hash based on the cython files in the codebade -# - retrieves the decrypted key if any for all whitelisted forks -# - checks whether the user optd int to use the cache -# - if so, check for availablity of cache files on the server, based on hash -# - set envars to control what the following scripts do - -# at most one of these will decrypt, so the end result is that $STORE_KEY -# either holds a single key or does not -export STORE_KEY="$STORE_KEY0""$STORE_KEY1""$STORE_KEY2""$STORE_KEY3""$STORE_KEY4" -export STORE_KEY="$STORE_KEY""$STORE_KEY5""$STORE_KEY6""$STORE_KEY7" - -export CYTHON_HASH=$(find pandas | grep -P '\.(pyx|pxd)$' | sort \ - | while read N; do echo $(tail -n+1 $N | md5sum ) ;done | md5sum| cut -d ' ' -f 1) - -export CYTHON_HASH=$CYTHON_HASH-$TRAVIS_PYTHON_VERSION - -# where the cache files live on the server -export CACHE_FILE_URL="https://cache27-pypandas.rhcloud.com/static/$STORE_KEY/$CYTHON_HASH.zip" -export VENV_FILE_URL="https://cache27-pypandas.rhcloud.com/static/$STORE_KEY/venv-$TRAVIS_PYTHON_VERSION.zip" -export CACHE_FILE_STORE_URL="https://cache27-pypandas.rhcloud.com/store/$STORE_KEY" - -echo "Hashing:" -find pandas | grep -P '\.(pyx|pxd)$' -echo "Key: $CYTHON_HASH" - -export CACHE_FILE_AVAILABLE=false -export VENV_FILE_AVAILABLE=false -export PLEASE_TRAVIS_FASTER=false - -# check whether the user opted in to use the cache via commit message -if [ x"$(git log --format='%B' -n 1 | grep PLEASE_TRAVIS_FASTER | wc -l)" != x"0" ]; then - export PLEASE_TRAVIS_FASTER=true -fi; -if [ x"$(git log --format='%B' -n 1 | grep PTF | wc -l)" != x"0" ]; then - export PLEASE_TRAVIS_FASTER=true -fi; - -if $PLEASE_TRAVIS_FASTER; then - - # check whether the files exists on the server - curl -s -f -I "$CACHE_FILE_URL" # silent, don;t expose key - if [ x"$?" == x"0" ] ; then - export CACHE_FILE_AVAILABLE=true; - fi - - - curl -s -f -I "$VENV_FILE_URL" # silent, don;t expose key - if [ x"$?" == x"0" ] ; then - export VENV_FILE_AVAILABLE=true; - fi - - # the pandas build cache machinery needs this set, and the directory created - export BUILD_CACHE_DIR="/tmp/build_cache" - mkdir "$BUILD_CACHE_DIR" -fi; - -# debug -echo "PLEASE_TRAVIS_FASTER=$PLEASE_TRAVIS_FASTER" -echo "CACHE_FILE_AVAILABLE=$CACHE_FILE_AVAILABLE" -echo "VENV_FILE_AVAILABLE=$VENV_FILE_AVAILABLE" - -true diff --git a/ci/install.sh b/ci/install.sh index 294db286a1001..60ea5643c6ad2 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -45,102 +45,60 @@ if [ x"$FULL_DEPS" == x"true" ] ; then fi fi -# Everything installed inside this clause into site-packages -# will get included in the cached venv downloaded from the net -# in PTF mode -if ( ! $VENV_FILE_AVAILABLE ); then - echo "Running full monty" - # Hard Deps - pip install $PIP_ARGS nose python-dateutil pytz +# Hard Deps +pip install $PIP_ARGS nose python-dateutil pytz +pip install $PIP_ARGS cython + +if [ ${TRAVIS_PYTHON_VERSION} == "3.3" ]; then # should be >=3,3 + pip install $PIP_ARGS numpy==1.7.0 +elif [ ${TRAVIS_PYTHON_VERSION} == "3.2" ]; then + # sudo apt-get $APT_ARGS install python3-numpy; # 1.6.2 or precise + pip install $PIP_ARGS numpy==1.6.1 +else + pip install $PIP_ARGS numpy==1.6.1 +fi + +# Optional Deps +if [ x"$FULL_DEPS" == x"true" ]; then + echo "Installing FULL_DEPS" pip install $PIP_ARGS cython - if [ ${TRAVIS_PYTHON_VERSION} == "3.3" ]; then # should be >=3,3 - pip install $PIP_ARGS numpy==1.7.0 - elif [ ${TRAVIS_PYTHON_VERSION} == "3.2" ]; then - # sudo apt-get $APT_ARGS install python3-numpy; # 1.6.2 or precise - pip install $PIP_ARGS numpy==1.6.1 + if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then + pip install $PIP_ARGS xlwt + pip install $PIP_ARGS bottleneck + pip install $PIP_ARGS numexpr==2.0.1 + pip install $PIP_ARGS tables==2.3.1 else - pip install $PIP_ARGS numpy==1.6.1 + pip install $PIP_ARGS numexpr + pip install $PIP_ARGS tables fi - # Optional Deps - if [ x"$FULL_DEPS" == x"true" ]; then - echo "Installing FULL_DEPS" - pip install $PIP_ARGS cython - - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then - pip install $PIP_ARGS xlwt - pip install $PIP_ARGS bottleneck - pip install $PIP_ARGS numexpr==2.0.1 - pip install $PIP_ARGS tables==2.3.1 - else - pip install $PIP_ARGS numexpr - pip install $PIP_ARGS tables - fi - - pip install $PIP_ARGS matplotlib - pip install $PIP_ARGS openpyxl - pip install $PIP_ARGS xlrd>=0.9.0 - pip install $PIP_ARGS 'http://downloads.sourceforge.net/project/pytseries/scikits.timeseries/0.91.3/scikits.timeseries-0.91.3.tar.gz?r=' - pip install $PIP_ARGS patsy - pip install $PIP_ARGS html5lib - - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then - sudo apt-get $APT_ARGS remove python3-lxml - elif [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then - sudo apt-get $APT_ARGS remove python-lxml - fi - - pip install $PIP_ARGS lxml - # fool statsmodels into thinking pandas was already installed - # so it won't refuse to install itself. We want it in the zipped venv - - mkdir $SITE_PKG_DIR/pandas - touch $SITE_PKG_DIR/pandas/__init__.py - echo "version='0.10.0-phony'" > $SITE_PKG_DIR/pandas/version.py - pip install $PIP_ARGS git+git://github.com/statsmodels/statsmodels@c9062e43b8a5f7385537ca95#egg=statsmodels - - rm -Rf $SITE_PKG_DIR/pandas # scrub phoney pandas + pip install $PIP_ARGS matplotlib + pip install $PIP_ARGS openpyxl + pip install $PIP_ARGS xlrd>=0.9.0 + pip install $PIP_ARGS 'http://downloads.sourceforge.net/project/pytseries/scikits.timeseries/0.91.3/scikits.timeseries-0.91.3.tar.gz?r=' + pip install $PIP_ARGS patsy + pip install $PIP_ARGS html5lib + + if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then + sudo apt-get $APT_ARGS remove python3-lxml + elif [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then + sudo apt-get $APT_ARGS remove python-lxml fi - # pack up the venv and cache it - if [ x"$STORE_KEY" != x"" ] && $UPLOAD && $PLEASE_TRAVIS_FASTER ; then - VENV_FNAME="venv-$TRAVIS_PYTHON_VERSION.zip" - - zip $ZIP_FLAGS -r "$HOME/$VENV_FNAME" $SITE_PKG_DIR/ - ls -l "$HOME/$VENV_FNAME" - echo "posting venv" - # silent, don't expose key - curl -s --form upload=@"$HOME/$VENV_FNAME" "$CACHE_FILE_STORE_URL/$VENV_FNAME" - fi + pip install $PIP_ARGS lxml + # fool statsmodels into thinking pandas was already installed + # so it won't refuse to install itself. -fi; + mkdir $SITE_PKG_DIR/pandas + touch $SITE_PKG_DIR/pandas/__init__.py + echo "version='0.10.0-phony'" > $SITE_PKG_DIR/pandas/version.py + pip install $PIP_ARGS git+git://github.com/statsmodels/statsmodels@c9062e43b8a5f7385537ca95#egg=statsmodels -#build and install pandas -if [ x"$BUILD_CACHE_DIR" != x"" ]; then - scripts/use_build_cache.py -d - python setup.py install; -else - python setup.py build_ext install + rm -Rf $SITE_PKG_DIR/pandas # scrub phoney pandas fi -# package pandas build artifacts and send them home -# that's everything the build cache (scripts/use_build_cache.py) -# stored during the build (.so, pyx->.c and 2to3) -if (! $CACHE_FILE_AVAILABLE) ; then - if [ x"$STORE_KEY" != x"" ] && $UPLOAD && $PLEASE_TRAVIS_FASTER ; then - echo "Posting artifacts" - strip "$BUILD_CACHE_DIR/*" &> /dev/null - echo "$BUILD_CACHE_DIR" - cd "$BUILD_CACHE_DIR"/ - zip -r $ZIP_FLAGS "$HOME/$CYTHON_HASH".zip * - cd "$TRAVIS_BUILD_DIR" - pwd - zip "$HOME/$CYTHON_HASH".zip $(find pandas | grep -P '\.(pyx|pxd)$' | sed -r 's/.(pyx|pxd)$/.c/') - - # silent, don't expose key - curl --connect-timeout 5 -s --form upload=@"$HOME/$CYTHON_HASH".zip "$CACHE_FILE_STORE_URL/$CYTHON_HASH.zip" - fi -fi +# build pandas +python setup.py build_ext install true diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 2a2a5c9643c75..d019af3370ba9 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -37,18 +37,10 @@ import random import numpy as np -import pandas as pd from pandas import DataFrame, Series -try: - import git # gitpython -except Exception: - print("Error: Please install the `gitpython` package\n") - sys.exit(1) - from suite import REPO_PATH -VB_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_MIN_DURATION = 0.01 HEAD_COL="head[ms]" BASE_COL="base[ms]" @@ -65,14 +57,6 @@ parser.add_argument('-t', '--target-commit', help='The commit to compare against the baseline (default: HEAD).', type=str) -parser.add_argument('--base-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\ - 'filename must be of the form -*.* or specify --base-commit seperately', - type=str) -parser.add_argument('--target-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run '\ - 'filename must be of the form -*.* or specify --target-commit seperately', - type=str) parser.add_argument('-m', '--min-duration', help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, type=float, @@ -85,7 +69,7 @@ metavar="FNAME", dest='outdf', default=None, - help='Name of file to df.to_pickle() the result table into. Will overwrite') + help='Name of file to df.save() the result table into. Will overwrite') parser.add_argument('-r', '--regex', metavar="REGEX", dest='regex', @@ -120,7 +104,8 @@ parser.add_argument('-a', '--affinity', metavar="a", dest='affinity', - default=None, + default=1, + type=int, help='set processor affinity of processm by default bind to cpu/core #1 only' 'requires the "affinity" python module , will raise Warning otherwise' ) @@ -221,74 +206,30 @@ def profile_comparative(benchmarks): head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) - - report_comparative(head_res,baseline_res) - + ratio = head_res['timing'] / baseline_res['timing'] + totals = DataFrame({HEAD_COL:head_res['timing'], + BASE_COL:baseline_res['timing'], + 'ratio':ratio, + 'name':baseline_res.name}, + columns=[HEAD_COL, BASE_COL, "ratio", "name"]) + totals = totals.ix[totals[HEAD_COL] > args.min_duration] + # ignore below threshold + totals = totals.dropna( + ).sort("ratio").set_index('name') # sort in ascending order + + h_msg = repo.messages.get(h_head, "") + b_msg = repo.messages.get(h_baseline, "") + + print_report(totals,h_head=h_head,h_msg=h_msg, + h_baseline=h_baseline,b_msg=b_msg) + + if args.outdf: + prprint("The results DataFrame was written to '%s'\n" % args.outdf) + totals.save(args.outdf) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) -def prep_pickle_for_total(df, agg_name='median'): - """ - accepts a datafram resulting from invocation with -H -d o.pickle - If multiple data columns are present (-N was used), the - `agg_name` attr of the datafram will be used to reduce - them to a single value per vbench, df.median is used by defa - ult. - - Returns a datadrame of the form expected by prep_totals - """ - def prep(df): - agg = getattr(df,agg_name) - df = DataFrame(agg(1)) - cols = list(df.columns) - cols[0]='timing' - df.columns=cols - df['name'] = list(df.index) - return df - - return prep(df) - -def prep_totals(head_res, baseline_res): - """ - Each argument should be a dataframe with 'timing' and 'name' columns - where name is the name of the vbench. - - returns a 'totals' dataframe, suitable as input for print_report. - """ - head_res, baseline_res = head_res.align(baseline_res) - ratio = head_res['timing'] / baseline_res['timing'] - totals = DataFrame({HEAD_COL:head_res['timing'], - BASE_COL:baseline_res['timing'], - 'ratio':ratio, - 'name':baseline_res.name}, - columns=[HEAD_COL, BASE_COL, "ratio", "name"]) - totals = totals.ix[totals[HEAD_COL] > args.min_duration] - # ignore below threshold - totals = totals.dropna( - ).sort("ratio").set_index('name') # sort in ascending order - return totals - -def report_comparative(head_res,baseline_res): - try: - r=git.Repo(VB_DIR) - except: - import pdb - pdb.set_trace() - - totals = prep_totals(head_res,baseline_res) - - h_head = args.target_commit - h_baseline = args.base_commit - h_msg = r.commit(h_head).message.strip() - b_msg = r.commit(h_baseline).message.strip() - - print_report(totals,h_head=h_head,h_msg=h_msg, - h_baseline=h_baseline,b_msg=b_msg) - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.to_pickle(args.outdf) def profile_head_single(benchmark): import gc @@ -364,7 +305,7 @@ def profile_head(benchmarks): if args.outdf: prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).to_pickle(args.outdf) + DataFrame(results).save(args.outdf) def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): @@ -447,23 +388,18 @@ def main(): random.seed(args.seed) np.random.seed(args.seed) - if args.base_pickle and args.target_pickle: - baseline_res = prep_pickle_for_total(pd.read_pickle(args.base_pickle)) - target_res = prep_pickle_for_total(pd.read_pickle(args.target_pickle)) - - report_comparative(target_res, baseline_res) - sys.exit(0) - - if args.affinity is not None: - try: - import affinity - - affinity.set_process_affinity_mask(0,args.affinity) - assert affinity.get_process_affinity_mask(0) == args.affinity - print("CPU affinity set to %d" % args.affinity) - except ImportError: - print("-a/--afinity specified, but the 'affinity' module is not available, aborting.\n") - sys.exit(1) + try: + import affinity + affinity.set_process_affinity_mask(0,args.affinity) + assert affinity.get_process_affinity_mask(0) == args.affinity + print("CPU affinity set to %d" % args.affinity) + except ImportError: + import warnings + print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"+ + "The 'affinity' module is not available, results may be unreliable\n" + + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" + ) + time.sleep(2) print("\n") prprint("LOG_FILE = %s" % args.log_file) @@ -543,39 +479,10 @@ def inner(repo_path): if __name__ == '__main__': args = parser.parse_args() - if (not args.head - and not (args.base_commit and args.target_commit) - and not (args.base_pickle and args.target_pickle)): + if not args.head and (not args.base_commit and not args.target_commit): parser.print_help() - sys.exit(1) - elif ((args.base_pickle or args.target_pickle) and not - (args.base_pickle and args.target_pickle)): - print("Must specify Both --base-pickle and --target-pickle.") - sys.exit(1) - - if ((args.base_pickle or args.target_pickle) and not - (args.base_commit and args.target_commit)): - if not args.base_commit: - print("base_commit not specified, Assuming base_pickle is named -foo.*") - args.base_commit = args.base_pickle.split('-')[0] - if not args.target_commit: - print("target_commit not specified, Assuming target_pickle is named -foo.*") - args.target_commit = args.target_pickle.split('-')[0] - - import warnings - warnings.filterwarnings('ignore',category=FutureWarning) - warnings.filterwarnings('ignore',category=DeprecationWarning) - - if args.base_commit and args.target_commit: - print("Verifying specified commits exist in repo...") - r=git.Repo(VB_DIR) - for c in [ args.base_commit, args.target_commit ]: - try: - msg = r.commit(c).message.strip() - except git.BadObject: - print("The commit '%s' was not found, aborting" % c) - sys.exit(1) - else: - print("%s: %s" % (c,msg)) - - main() + else: + import warnings + warnings.filterwarnings('ignore',category=FutureWarning) + warnings.filterwarnings('ignore',category=DeprecationWarning) + main()